max@1: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ max@1: max@1: /* max@1: QM Vamp Plugin Set max@1: max@1: Centre for Digital Music, Queen Mary, University of London. max@1: max@1: This program is free software; you can redistribute it and/or max@1: modify it under the terms of the GNU General Public License as max@1: published by the Free Software Foundation; either version 2 of the max@1: License, or (at your option) any later version. See the file max@1: COPYING included with this distribution for more information. max@1: */ max@1: max@1: #include "SongParts.h" max@1: max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include max@1: #include "armadillo" max@1: #include max@1: #include max@1: #include max@1: #include max@1: max@1: #include max@1: max@1: using namespace boost::numeric; max@1: using namespace arma; max@1: using std::string; max@1: using std::vector; max@1: using std::cerr; max@1: using std::cout; max@1: using std::endl; max@1: max@1: max@1: #ifndef __GNUC__ max@1: #include max@1: #endif max@1: max@1: max@1: // Result Struct max@1: typedef struct Part { max@1: int n; max@1: vector indices; max@1: string letter; max@1: unsigned value; max@1: int level; max@1: int nInd; max@1: }Part; max@1: max@1: max@1: /* ------------------------------------ */ max@1: /* ----- BEAT DETECTOR CLASS ---------- */ max@1: /* ------------------------------------ */ max@1: max@1: class BeatTrackerData max@1: { max@1: /* --- ATTRIBUTES --- */ max@1: public: max@1: DFConfig dfConfig; max@1: DetectionFunction *df; max@1: DownBeat *downBeat; max@1: vector dfOutput; max@1: Vamp::RealTime origin; max@1: max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ max@1: public: max@1: BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) { max@1: max@1: df = new DetectionFunction(config); max@1: // decimation factor aims at resampling to c. 3KHz; must be power of 2 max@1: int factor = MathUtilities::nextPowerOfTwo(rate / 3000); max@1: // std::cerr << "BeatTrackerData: factor = " << factor << std::endl; max@1: downBeat = new DownBeat(rate, factor, config.stepSize); max@1: } max@1: max@1: /* --- Desctructor --- */ max@1: ~BeatTrackerData() { max@1: delete df; max@1: delete downBeat; max@1: } max@1: max@1: void reset() { max@1: delete df; max@1: df = new DetectionFunction(dfConfig); max@1: dfOutput.clear(); max@1: downBeat->resetAudioBuffer(); max@1: origin = Vamp::RealTime::zeroTime; max@1: } max@1: }; max@1: max@1: max@1: /* --------------------------------------- */ max@1: /* ----- CHROMA EXTRACTOR CLASS ---------- */ max@1: /* --------------------------------------- */ max@1: max@1: class ChromaData max@1: { max@1: max@1: /* --- ATTRIBUTES --- */ max@1: max@1: public: max@1: int frameCount; max@1: int nBPS; max@1: Vamp::Plugin::FeatureList logSpectrum; max@1: size_t blockSize; max@1: int lengthOfNoteIndex; max@1: vector meanTunings; max@1: vector localTunings; max@1: float whitening; max@1: float preset; max@1: float useNNLS; max@1: vector localTuning; max@1: vector kernelValue; max@1: vector kernelFftIndex; max@1: vector kernelNoteIndex; max@1: float *dict; max@1: bool tuneLocal; max@1: float doNormalizeChroma; max@1: float rollon; max@1: float s; max@1: vector hw; max@1: vector sinvalues; max@1: vector cosvalues; max@1: Window window; max@1: FFTReal fft; max@1: size_t inputSampleRate; max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ max@1: max@1: public: max@1: ChromaData(float inputSampleRate, size_t block_size) : max@1: frameCount(0), max@1: nBPS(3), max@1: logSpectrum(0), max@1: blockSize(0), max@1: lengthOfNoteIndex(0), max@1: meanTunings(0), max@1: localTunings(0), max@1: whitening(1.0), max@1: preset(0.0), max@1: useNNLS(1.0), max@1: localTuning(0.0), max@1: kernelValue(0), max@1: kernelFftIndex(0), max@1: kernelNoteIndex(0), max@1: dict(0), max@1: tuneLocal(0.0), max@1: doNormalizeChroma(0), max@1: rollon(0.0), max@1: s(0.7), max@1: sinvalues(0), max@1: cosvalues(0), max@1: window(HanningWindow, block_size), max@1: fft(block_size), max@1: inputSampleRate(inputSampleRate) max@1: { max@1: // make the *note* dictionary matrix max@1: dict = new float[nNote * 84]; max@1: for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0; max@1: blockSize = block_size; max@1: } max@1: max@1: max@1: /* --- Desctructor --- */ max@1: max@1: ~ChromaData() { max@1: delete [] dict; max@1: } max@1: max@1: /* --- Public Methods --- */ max@1: max@1: void reset() { max@1: frameCount = 0; max@1: logSpectrum.clear(); max@1: for (int iBPS = 0; iBPS < 3; ++iBPS) { max@1: meanTunings[iBPS] = 0; max@1: localTunings[iBPS] = 0; max@1: } max@1: localTuning.clear(); max@1: } max@1: max@1: void baseProcess(float *inputBuffers, Vamp::RealTime timestamp) max@1: { max@1: max@1: frameCount++; max@1: float *magnitude = new float[blockSize/2]; max@1: double *fftReal = new double[blockSize]; max@1: double *fftImag = new double[blockSize]; max@1: max@1: // FFTReal wants doubles, so we need to make a local copy of inputBuffers max@1: double *inputBuffersDouble = new double[blockSize]; max@1: for (size_t i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i]; max@1: max@1: fft.process(false, inputBuffersDouble, fftReal, fftImag); max@1: max@1: float energysum = 0; max@1: // make magnitude max@1: float maxmag = -10000; max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) { max@1: magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] + max@1: fftImag[iBin] * fftImag[iBin]); max@1: if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize; max@1: // a valid audio signal (between -1 and 1) should not be limited here. max@1: if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; max@1: if (rollon > 0) { max@1: energysum += pow(magnitude[iBin],2); max@1: } max@1: } max@1: max@1: float cumenergy = 0; max@1: if (rollon > 0) { max@1: for (int iBin = 2; iBin < static_cast(blockSize/2); iBin++) { max@1: cumenergy += pow(magnitude[iBin],2); max@1: if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0; max@1: else break; max@1: } max@1: } max@1: max@1: if (maxmag < 2) { max@1: // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) { max@1: magnitude[iBin] = 0; max@1: } max@1: } max@1: max@1: // cerr << magnitude[200] << endl; max@1: max@1: // note magnitude mapping using pre-calculated matrix max@1: float *nm = new float[nNote]; // note magnitude max@1: for (int iNote = 0; iNote < nNote; iNote++) { max@1: nm[iNote] = 0; // initialise as 0 max@1: } max@1: int binCount = 0; max@1: for (vector::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) { max@1: nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount]; max@1: binCount++; max@1: } max@1: max@1: float one_over_N = 1.0/frameCount; max@1: // update means of complex tuning variables max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N; max@1: max@1: for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; max@1: float ratioOld = 0.997; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: localTunings[iBPS] *= ratioOld; max@1: localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); max@1: } max@1: } max@1: max@1: float localTuningImag = 0; max@1: float localTuningReal = 0; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: localTuningReal += localTunings[iBPS] * cosvalues[iBPS]; max@1: localTuningImag += localTunings[iBPS] * sinvalues[iBPS]; max@1: } max@1: max@1: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); max@1: localTuning.push_back(normalisedtuning); max@1: max@1: Vamp::Plugin::Feature f1; // logfreqspec max@1: f1.hasTimestamp = true; max@1: f1.timestamp = timestamp; max@1: for (int iNote = 0; iNote < nNote; iNote++) { max@1: f1.values.push_back(nm[iNote]); max@1: } max@1: max@1: // deletes max@1: delete[] inputBuffersDouble; max@1: delete[] magnitude; max@1: delete[] fftReal; max@1: delete[] fftImag; max@1: delete[] nm; max@1: max@1: logSpectrum.push_back(f1); // remember note magnitude max@1: } max@1: max@1: bool initialise() max@1: { max@1: dictionaryMatrix(dict, s); max@1: max@1: // make things for tuning estimation max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); max@1: cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); max@1: } max@1: max@1: max@1: // make hamming window of length 1/2 octave max@1: int hamwinlength = nBPS * 6 + 1; max@1: float hamwinsum = 0; max@1: for (int i = 0; i < hamwinlength; ++i) { max@1: hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); max@1: hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); max@1: } max@1: for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; max@1: max@1: max@1: // initialise the tuning max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: meanTunings.push_back(0); max@1: localTunings.push_back(0); max@1: } max@1: max@1: blockSize = blockSize; max@1: frameCount = 0; max@1: int tempn = nNote * blockSize/2; max@1: // cerr << "length of tempkernel : " << tempn << endl; max@1: float *tempkernel; max@1: max@1: tempkernel = new float[tempn]; max@1: max@1: logFreqMatrix(inputSampleRate, blockSize, tempkernel); max@1: kernelValue.clear(); max@1: kernelFftIndex.clear(); max@1: kernelNoteIndex.clear(); max@1: int countNonzero = 0; max@1: for (int iNote = 0; iNote < nNote; ++iNote) { max@1: // I don't know if this is wise: manually making a sparse matrix max@1: for (int iFFT = 0; iFFT < static_cast(blockSize/2); ++iFFT) { max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { max@1: kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { max@1: countNonzero++; max@1: } max@1: kernelFftIndex.push_back(iFFT); max@1: kernelNoteIndex.push_back(iNote); max@1: } max@1: } max@1: } max@1: delete [] tempkernel; max@1: } max@1: }; max@1: max@1: max@1: /* --------------------------------- */ max@1: /* ----- SONG PARTITIONER ---------- */ max@1: /* --------------------------------- */ max@1: max@1: max@1: /* --- ATTRIBUTES --- */ max@1: max@1: float SongPartitioner::m_stepSecs = 0.01161; // 512 samples at 44100 max@1: size_t SongPartitioner::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's max@1: size_t SongPartitioner::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's max@1: max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ max@1: SongPartitioner::SongPartitioner(float inputSampleRate) : max@1: Vamp::Plugin(inputSampleRate), max@1: m_d(0), max@1: m_bpb(4), max@1: m_pluginFrameCount(0) max@1: { max@1: } max@1: max@1: max@1: /* --- Desctructor --- */ max@1: SongPartitioner::~SongPartitioner() max@1: { max@1: delete m_d; max@1: } max@1: max@1: max@1: /* --- Methods --- */ max@1: string SongPartitioner::getIdentifier() const max@1: { max@1: return "qm-songpartitioner"; max@1: } max@1: max@1: string SongPartitioner::getName() const max@1: { max@1: return "Song Partitioner"; max@1: } max@1: max@1: string SongPartitioner::getDescription() const max@1: { max@1: return "Estimate contiguous segments pertaining to song parts such as verse and chorus."; max@1: } max@1: max@1: string SongPartitioner::getMaker() const max@1: { max@1: return "Queen Mary, University of London"; max@1: } max@1: max@1: int SongPartitioner::getPluginVersion() const max@1: { max@1: return 2; max@1: } max@1: max@1: string SongPartitioner::getCopyright() const max@1: { max@1: return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved"; max@1: } max@1: max@1: SongPartitioner::ParameterList SongPartitioner::getParameterDescriptors() const max@1: { max@1: ParameterList list; max@1: max@1: ParameterDescriptor desc; max@1: max@1: desc.identifier = "bpb"; max@1: desc.name = "Beats per Bar"; max@1: desc.description = "The number of beats in each bar"; max@1: desc.minValue = 2; max@1: desc.maxValue = 16; max@1: desc.defaultValue = 4; max@1: desc.isQuantized = true; max@1: desc.quantizeStep = 1; max@1: list.push_back(desc); max@1: max@1: return list; max@1: } max@1: max@1: float SongPartitioner::getParameter(std::string name) const max@1: { max@1: if (name == "bpb") return m_bpb; max@1: return 0.0; max@1: } max@1: max@1: void SongPartitioner::setParameter(std::string name, float value) max@1: { max@1: if (name == "bpb") m_bpb = lrintf(value); max@1: } max@1: max@1: max@1: // Return the StepSize for Chroma Extractor max@1: size_t SongPartitioner::getPreferredStepSize() const max@1: { max@1: size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); max@1: if (step < 1) step = 1; max@1: max@1: return step; max@1: } max@1: max@1: // Return the BlockSize for Chroma Extractor max@1: size_t SongPartitioner::getPreferredBlockSize() const max@1: { max@1: size_t theoretical = getPreferredStepSize() * 2; max@1: theoretical *= m_chromaFramesizeFactor; max@1: max@1: return theoretical; max@1: } max@1: max@1: max@1: // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects max@1: bool SongPartitioner::initialise(size_t channels, size_t stepSize, size_t blockSize) max@1: { max@1: if (m_d) { max@1: delete m_d; max@1: m_d = 0; max@1: } max@1: max@1: if (channels < getMinChannelCount() || max@1: channels > getMaxChannelCount()) { max@1: std::cerr << "SongPartitioner::initialise: Unsupported channel count: " max@1: << channels << std::endl; max@1: return false; max@1: } max@1: max@1: if (stepSize != getPreferredStepSize()) { max@1: std::cerr << "ERROR: SongPartitioner::initialise: Unsupported step size for this sample rate: " max@1: << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; max@1: return false; max@1: } max@1: max@1: if (blockSize != getPreferredBlockSize()) { max@1: std::cerr << "WARNING: SongPartitioner::initialise: Sub-optimal block size for this sample rate: " max@1: << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; max@1: } max@1: max@1: // Beat tracker and Chroma extractor has two different configuration parameters max@1: max@1: // Configuration Parameters for Beat Tracker max@1: DFConfig dfConfig; max@1: dfConfig.DFType = DF_COMPLEXSD; max@1: dfConfig.stepSize = stepSize; max@1: dfConfig.frameLength = blockSize / m_chromaFramesizeFactor; max@1: dfConfig.dbRise = 3; max@1: dfConfig.adaptiveWhitening = false; max@1: dfConfig.whiteningRelaxCoeff = -1; max@1: dfConfig.whiteningFloor = -1; max@1: max@1: // Initialise Beat Tracker max@1: m_d = new BeatTrackerData(m_inputSampleRate, dfConfig); max@1: m_d->downBeat->setBeatsPerBar(m_bpb); max@1: max@1: // Initialise Chroma Extractor max@1: m_chromadata = new ChromaData(m_inputSampleRate, blockSize); max@1: m_chromadata->initialise(); max@1: max@1: return true; max@1: } max@1: max@1: void SongPartitioner::reset() max@1: { max@1: if (m_d) m_d->reset(); max@1: m_pluginFrameCount = 0; max@1: } max@1: max@1: SongPartitioner::OutputList SongPartitioner::getOutputDescriptors() const max@1: { max@1: OutputList list; max@1: size_t outputCounter = 0; max@1: max@1: OutputDescriptor beat; max@1: beat.identifier = "beats"; max@1: beat.name = "Beats"; max@1: beat.description = "Beat locations labelled with metrical position"; max@1: beat.unit = ""; max@1: beat.hasFixedBinCount = true; max@1: beat.binCount = 0; max@1: beat.sampleType = OutputDescriptor::VariableSampleRate; max@1: beat.sampleRate = 1.0 / m_stepSecs; max@1: m_beatOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor bars; max@1: bars.identifier = "bars"; max@1: bars.name = "Bars"; max@1: bars.description = "Bar locations"; max@1: bars.unit = ""; max@1: bars.hasFixedBinCount = true; max@1: bars.binCount = 0; max@1: bars.sampleType = OutputDescriptor::VariableSampleRate; max@1: bars.sampleRate = 1.0 / m_stepSecs; max@1: m_barsOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor beatcounts; max@1: beatcounts.identifier = "beatcounts"; max@1: beatcounts.name = "Beat Count"; max@1: beatcounts.description = "Beat counter function"; max@1: beatcounts.unit = ""; max@1: beatcounts.hasFixedBinCount = true; max@1: beatcounts.binCount = 1; max@1: beatcounts.sampleType = OutputDescriptor::VariableSampleRate; max@1: beatcounts.sampleRate = 1.0 / m_stepSecs; max@1: m_beatcountsOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor beatsd; max@1: beatsd.identifier = "beatsd"; max@1: beatsd.name = "Beat Spectral Difference"; max@1: beatsd.description = "Beat spectral difference function used for bar-line detection"; max@1: beatsd.unit = ""; max@1: beatsd.hasFixedBinCount = true; max@1: beatsd.binCount = 1; max@1: beatsd.sampleType = OutputDescriptor::VariableSampleRate; max@1: beatsd.sampleRate = 1.0 / m_stepSecs; max@1: m_beatsdOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor logscalespec; max@1: logscalespec.identifier = "logscalespec"; max@1: logscalespec.name = "Log-Frequency Spectrum"; max@1: logscalespec.description = "Spectrum with linear frequency on a log scale."; max@1: logscalespec.unit = ""; max@1: logscalespec.hasFixedBinCount = true; max@1: logscalespec.binCount = nNote; max@1: logscalespec.hasKnownExtents = false; max@1: logscalespec.isQuantized = false; max@1: logscalespec.sampleType = OutputDescriptor::FixedSampleRate; max@1: logscalespec.hasDuration = false; max@1: logscalespec.sampleRate = m_inputSampleRate/2048; max@1: m_logscalespecOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor bothchroma; max@1: bothchroma.identifier = "bothchroma"; max@1: bothchroma.name = "Chromagram and Bass Chromagram"; max@1: bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription."; max@1: bothchroma.unit = ""; max@1: bothchroma.hasFixedBinCount = true; max@1: bothchroma.binCount = 24; max@1: bothchroma.hasKnownExtents = false; max@1: bothchroma.isQuantized = false; max@1: bothchroma.sampleType = OutputDescriptor::FixedSampleRate; max@1: bothchroma.hasDuration = false; max@1: bothchroma.sampleRate = m_inputSampleRate/2048; max@1: m_bothchromaOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor qchromafw; max@1: qchromafw.identifier = "qchromafw"; max@1: qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram"; max@1: qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical)."; max@1: qchromafw.unit = ""; max@1: qchromafw.hasFixedBinCount = true; max@1: qchromafw.binCount = 24; max@1: qchromafw.hasKnownExtents = false; max@1: qchromafw.isQuantized = false; max@1: qchromafw.sampleType = OutputDescriptor::FixedSampleRate; max@1: qchromafw.hasDuration = false; max@1: qchromafw.sampleRate = m_inputSampleRate/2048; max@1: m_qchromafwOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor qchroma; max@1: qchroma.identifier = "qchroma"; max@1: qchroma.name = "Quantised Chromagram and Bass Chromagram"; max@1: qchroma.description = "Quantised Chromagram and Bass Chromagram."; max@1: qchroma.unit = ""; max@1: qchroma.hasFixedBinCount = true; max@1: qchroma.binCount = 24; max@1: qchroma.hasKnownExtents = false; max@1: qchroma.isQuantized = false; max@1: qchroma.sampleType = OutputDescriptor::FixedSampleRate; max@1: qchroma.hasDuration = true; max@1: m_qchromaOutputNumber = outputCounter++; max@1: max@1: OutputDescriptor segm; max@1: segm.identifier = "segm"; max@1: segm.name = "Segmentation"; max@1: segm.description = "Segmentation"; max@1: segm.unit = "segment-type"; max@1: segm.hasFixedBinCount = true; max@1: //segm.binCount = 24; max@1: segm.binCount = 1; max@1: segm.hasKnownExtents = true; max@1: segm.minValue = 1; max@1: segm.maxValue = 5; max@1: segm.isQuantized = true; max@1: segm.quantizeStep = 1; max@1: segm.sampleType = OutputDescriptor::VariableSampleRate; max@1: segm.hasDuration = true; max@1: m_segmOutputNumber = outputCounter++; max@1: max@1: max@1: /* max@1: OutputList list; max@1: OutputDescriptor segmentation; max@1: segmentation.identifier = "segmentation"; max@1: segmentation.name = "Segmentation"; max@1: segmentation.description = "Segmentation"; max@1: segmentation.unit = "segment-type"; max@1: segmentation.hasFixedBinCount = true; max@1: segmentation.binCount = 1; max@1: segmentation.hasKnownExtents = true; max@1: segmentation.minValue = 1; max@1: segmentation.maxValue = nSegmentTypes; max@1: segmentation.isQuantized = true; max@1: segmentation.quantizeStep = 1; max@1: segmentation.sampleType = OutputDescriptor::VariableSampleRate; max@1: segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize(); max@1: list.push_back(segmentation); max@1: return list; max@1: */ max@1: max@1: max@1: list.push_back(beat); max@1: list.push_back(bars); max@1: list.push_back(beatcounts); max@1: list.push_back(beatsd); max@1: list.push_back(logscalespec); max@1: list.push_back(bothchroma); max@1: list.push_back(qchromafw); max@1: list.push_back(qchroma); max@1: list.push_back(segm); max@1: max@1: return list; max@1: } max@1: max@1: // Executed for each frame - called from the host max@1: max@1: // We use time domain input, because DownBeat requires it -- so we max@1: // use the time-domain version of DetectionFunction::process which max@1: // does its own FFT. It requires doubles as input, so we need to max@1: // make a temporary copy max@1: max@1: // We only support a single input channel max@1: SongPartitioner::FeatureSet SongPartitioner::process(const float *const *inputBuffers,Vamp::RealTime timestamp) max@1: { max@1: if (!m_d) { max@1: cerr << "ERROR: SongPartitioner::process: " max@1: << "SongPartitioner has not been initialised" max@1: << endl; max@1: return FeatureSet(); max@1: } max@1: max@1: const int fl = m_d->dfConfig.frameLength; max@1: #ifndef __GNUC__ max@1: double *dfinput = (double *)alloca(fl * sizeof(double)); max@1: #else max@1: double dfinput[fl]; max@1: #endif max@1: int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2; max@1: max@1: // Since chroma needs a much longer frame size, we only ever use the very max@1: // beginning of the frame for beat tracking. max@1: for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i]; max@1: double output = m_d->df->process(dfinput); max@1: max@1: if (m_d->dfOutput.empty()) m_d->origin = timestamp; max@1: max@1: // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl; max@1: m_d->dfOutput.push_back(output); max@1: max@1: // Downsample and store the incoming audio block. max@1: // We have an overlap on the incoming audio stream (step size is max@1: // half block size) -- this function is configured to take only a max@1: // step size's worth, so effectively ignoring the overlap. Note max@1: // however that this means we omit the last blocksize - stepsize max@1: // samples completely for the purposes of barline detection max@1: // (hopefully not a problem) max@1: m_d->downBeat->pushAudioBlock(inputBuffers[0]); max@1: max@1: // The following is not done every time, but only every m_chromaFramesizeFactor times, max@1: // because the chroma does not need dense time frames. max@1: max@1: if (m_pluginFrameCount % m_chromaStepsizeFactor == 0) max@1: { max@1: max@1: // Window the full time domain, data, FFT it and process chroma stuff. max@1: max@1: #ifndef __GNUC__ max@1: float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float)); max@1: #else max@1: float windowedBuffers[m_chromadata->blockSize]; max@1: #endif max@1: m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]); max@1: max@1: // adjust timestamp (we want the middle of the frame) max@1: timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate)); max@1: max@1: m_chromadata->baseProcess(&windowedBuffers[0], timestamp); max@1: max@1: } max@1: m_pluginFrameCount++; max@1: max@1: FeatureSet fs; max@1: fs[m_logscalespecOutputNumber].push_back( max@1: m_chromadata->logSpectrum.back()); max@1: return fs; max@1: } max@1: max@1: SongPartitioner::FeatureSet SongPartitioner::getRemainingFeatures() max@1: { max@1: if (!m_d) { max@1: cerr << "ERROR: SongPartitioner::getRemainingFeatures: " max@1: << "SongPartitioner has not been initialised" max@1: << endl; max@1: return FeatureSet(); max@1: } max@1: max@1: FeatureSet masterFeatureset = BeatTrack(); max@1: FeatureList chromaList = ChromaFeatures(); max@1: max@1: for (size_t i = 0; i < chromaList.size(); ++i) max@1: { max@1: masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]); max@1: } max@1: max@1: // quantised and pseudo-quantised (beat-wise) chroma max@1: std::vector quantisedChroma = BeatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]); max@1: max@1: masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0]; max@1: masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1]; max@1: max@1: // Segmentation max@1: masterFeatureset[m_segmOutputNumber] = Segmenter(quantisedChroma[1]); max@1: max@1: return(masterFeatureset); max@1: } max@1: max@1: /* ------ Beat Tracker ------ */ max@1: max@1: SongPartitioner::FeatureSet SongPartitioner::BeatTrack() max@1: { max@1: vector df; max@1: vector beatPeriod; max@1: vector tempi; max@1: max@1: for (size_t i = 2; i < m_d->dfOutput.size(); ++i) { // discard first two elts max@1: df.push_back(m_d->dfOutput[i]); max@1: beatPeriod.push_back(0.0); max@1: } max@1: if (df.empty()) return FeatureSet(); max@1: max@1: TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); max@1: tt.calculateBeatPeriod(df, beatPeriod, tempi); max@1: max@1: vector beats; max@1: tt.calculateBeats(df, beatPeriod, beats); max@1: max@1: vector downbeats; max@1: size_t downLength = 0; max@1: const float *downsampled = m_d->downBeat->getBufferedAudio(downLength); max@1: m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats); max@1: max@1: vector beatsd; max@1: m_d->downBeat->getBeatSD(beatsd); max@1: max@1: /*std::cout << "BeatTracker: found downbeats at: "; max@1: for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/ max@1: max@1: FeatureSet returnFeatures; max@1: max@1: char label[20]; max@1: max@1: int dbi = 0; max@1: int beat = 0; max@1: int bar = 0; max@1: max@1: if (!downbeats.empty()) { max@1: // get the right number for the first beat; this will be max@1: // incremented before use (at top of the following loop) max@1: int firstDown = downbeats[0]; max@1: beat = m_bpb - firstDown - 1; max@1: if (beat == m_bpb) beat = 0; max@1: } max@1: max@1: for (size_t i = 0; i < beats.size(); ++i) { max@1: max@1: size_t frame = beats[i] * m_d->dfConfig.stepSize; max@1: max@1: if (dbi < downbeats.size() && i == downbeats[dbi]) { max@1: beat = 0; max@1: ++bar; max@1: ++dbi; max@1: } else { max@1: ++beat; max@1: } max@1: max@1: /* Ooutput Section */ max@1: max@1: // outputs are: max@1: // max@1: // 0 -> beats max@1: // 1 -> bars max@1: // 2 -> beat counter function max@1: max@1: Feature feature; max@1: feature.hasTimestamp = true; max@1: feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate)); max@1: max@1: sprintf(label, "%d", beat + 1); max@1: feature.label = label; max@1: returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats max@1: max@1: feature.values.push_back(beat + 1); max@1: returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function max@1: max@1: if (i > 0 && i <= beatsd.size()) { max@1: feature.values.clear(); max@1: feature.values.push_back(beatsd[i-1]); max@1: feature.label = ""; max@1: returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference max@1: } max@1: max@1: if (beat == 0) { max@1: feature.values.clear(); max@1: sprintf(label, "%d", bar); max@1: feature.label = label; max@1: returnFeatures[m_barsOutputNumber].push_back(feature); // bars max@1: } max@1: } max@1: max@1: return returnFeatures; max@1: } max@1: max@1: max@1: /* ------ Chroma Extractor ------ */ max@1: max@1: SongPartitioner::FeatureList SongPartitioner::ChromaFeatures() max@1: { max@1: max@1: FeatureList returnFeatureList; max@1: FeatureList tunedlogfreqspec; max@1: max@1: if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList; max@1: max@1: /** Calculate Tuning max@1: calculate tuning from (using the angle of the complex number defined by the max@1: cumulative mean real and imag values) max@1: **/ max@1: float meanTuningImag = 0; max@1: float meanTuningReal = 0; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS]; max@1: meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS]; max@1: } max@1: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); max@1: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); max@1: int intShift = floor(normalisedtuning * 3); max@1: float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this max@1: max@1: char buffer0 [50]; max@1: max@1: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); max@1: max@1: /** Tune Log-Frequency Spectrogram max@1: calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to max@1: perform linear interpolation on the existing log-frequency spectrogram (kinda f1). max@1: **/ max@1: cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; max@1: max@1: float tempValue = 0; max@1: max@1: int count = 0; max@1: max@1: for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i) max@1: { max@1: max@1: Feature f1 = *i; max@1: Feature f2; // tuned log-frequency spectrum max@1: max@1: f2.hasTimestamp = true; max@1: f2.timestamp = f1.timestamp; max@1: max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); // set lower edge to zero max@1: max@1: if (m_chromadata->tuneLocal) { max@1: intShift = floor(m_chromadata->localTuning[count] * 3); max@1: floatShift = m_chromadata->localTuning[count] * 3 - intShift; max@1: // floatShift is a really bad name for this max@1: } max@1: max@1: for (int k = 2; k < (int)f1.values.size() - 3; ++k) max@1: { // interpolate all inner bins max@1: tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; max@1: f2.values.push_back(tempValue); max@1: } max@1: max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); // upper edge max@1: max@1: vector runningmean = SpecialConvolution(f2.values,m_chromadata->hw); max@1: vector runningstd; max@1: for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) max@1: runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); max@1: } max@1: runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve max@1: for (int i = 0; i < nNote; i++) max@1: { max@1: max@1: runningstd[i] = sqrt(runningstd[i]); max@1: // square root to finally have running std max@1: max@1: if (runningstd[i] > 0) max@1: { max@1: f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? max@1: (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0; max@1: } max@1: max@1: if (f2.values[i] < 0) { max@1: max@1: cerr << "ERROR: negative value in logfreq spectrum" << endl; max@1: max@1: } max@1: } max@1: tunedlogfreqspec.push_back(f2); max@1: count++; max@1: } max@1: cerr << "done." << endl; max@1: /** Semitone spectrum and chromagrams max@1: Semitone-spaced log-frequency spectrum derived max@1: from the tuned log-freq spectrum above. the spectrum max@1: is inferred using a non-negative least squares algorithm. max@1: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means max@1: bass and treble stacked onto each other). max@1: **/ max@1: if (m_chromadata->useNNLS == 0) { max@1: cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; max@1: } else { max@1: cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; max@1: } max@1: max@1: vector oldchroma = vector(12,0); max@1: vector oldbasschroma = vector(12,0); max@1: count = 0; max@1: max@1: for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) { max@1: Feature logfreqsp = *it; // logfreq spectrum max@1: Feature bothchroma; // treble and bass chromagram max@1: max@1: bothchroma.hasTimestamp = true; max@1: bothchroma.timestamp = logfreqsp.timestamp; max@1: max@1: float b[nNote]; max@1: max@1: bool some_b_greater_zero = false; max@1: float sumb = 0; max@1: for (int i = 0; i < nNote; i++) { max@1: b[i] = logfreqsp.values[i]; max@1: sumb += b[i]; max@1: if (b[i] > 0) { max@1: some_b_greater_zero = true; max@1: } max@1: } max@1: max@1: // here's where the non-negative least squares algorithm calculates the note activation x max@1: max@1: vector chroma = vector(12, 0); max@1: vector basschroma = vector(12, 0); max@1: float currval; max@1: int iSemitone = 0; max@1: max@1: if (some_b_greater_zero) { max@1: if (m_chromadata->useNNLS == 0) { max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { max@1: currval = 0; max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { max@1: currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); max@1: } max@1: chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; max@1: basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; max@1: iSemitone++; max@1: } max@1: max@1: } else { max@1: float x[84+1000]; max@1: for (int i = 1; i < 1084; ++i) x[i] = 1.0; max@1: vector signifIndex; max@1: int index=0; max@1: sumb /= 84.0; max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { max@1: float currval = 0; max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { max@1: currval += b[iNote + iBPS]; max@1: } max@1: if (currval > 0) signifIndex.push_back(index); max@1: index++; max@1: } max@1: float rnorm; max@1: float w[84+1000]; max@1: float zz[84+1000]; max@1: int indx[84+1000]; max@1: int mode; max@1: int dictsize = nNote*signifIndex.size(); max@1: max@1: float *curr_dict = new float[dictsize]; max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { max@1: for (int iBin = 0; iBin < nNote; iBin++) { max@1: curr_dict[iNote * nNote + iBin] = max@1: 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin]; max@1: } max@1: } max@1: nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); max@1: delete [] curr_dict; max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { max@1: // cerr << mode << endl; max@1: chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; max@1: basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; max@1: } max@1: } max@1: } max@1: max@1: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); max@1: // just stack the both chromas max@1: max@1: bothchroma.values = chroma; max@1: returnFeatureList.push_back(bothchroma); max@1: count++; max@1: } max@1: cerr << "done." << endl; max@1: max@1: return returnFeatureList; max@1: } max@1: max@1: /* ------ Beat Quantizer ------ */ max@1: max@4: std::vector max@4: SongPartitioner::BeatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats) max@1: { max@1: std::vector returnVector; max@1: max@1: FeatureList fwQchromagram; // frame-wise beat-quantised chroma max@1: FeatureList bwQchromagram; // beat-wise beat-quantised chroma max@1: max@4: int nChromaFrame = (int) chromagram.size(); max@4: int nBeat = (int) beats.size(); max@1: max@1: if (nBeat == 0 && nChromaFrame == 0) return returnVector; max@1: max@1: size_t nBin = chromagram[0].values.size(); max@1: max@1: vector tempChroma = vector(nBin); max@1: max@1: Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime; max@1: int currBeatCount = -1; // start before first beat max@1: int framesInBeat = 0; max@1: max@4: for (int iChroma = 0; iChroma < nChromaFrame; ++iChroma) max@1: { max@4: Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp; max@4: Vamp::RealTime tempBeatTimestamp; max@4: max@4: if (currBeatCount != beats.size()-1) tempBeatTimestamp = beats[currBeatCount+1].timestamp; max@4: else tempBeatTimestamp = chromagram[nChromaFrame-1].timestamp; max@4: max@4: if (frameTimestamp > tempBeatTimestamp || max@1: iChroma == nChromaFrame-1) max@1: { max@1: // new beat (or last chroma frame) max@1: // 1. finish all the old beat processing max@4: if (framesInBeat > 0) max@4: { max@4: for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average max@4: } max@1: max@1: Feature bwQchromaFrame; max@1: bwQchromaFrame.hasTimestamp = true; max@1: bwQchromaFrame.timestamp = beatTimestamp; max@1: bwQchromaFrame.values = tempChroma; max@1: bwQchromaFrame.duration = beats[currBeatCount+1].timestamp - beats[currBeatCount].timestamp; max@1: bwQchromagram.push_back(bwQchromaFrame); max@1: max@1: for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame) max@1: { max@1: Feature fwQchromaFrame; max@1: fwQchromaFrame.hasTimestamp = true; max@1: fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp; max@1: fwQchromaFrame.values = tempChroma; // all between two beats get the same max@1: fwQchromagram.push_back(fwQchromaFrame); max@1: } max@1: max@1: // 2. increments / resets for current (new) beat max@1: currBeatCount++; max@1: beatTimestamp = beats[currBeatCount].timestamp; max@1: for (size_t i = 0; i < nBin; ++i) tempChroma[i] = 0; // average max@1: framesInBeat = 0; max@1: } max@1: framesInBeat++; max@1: for (size_t i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i]; max@1: } max@1: returnVector.push_back(fwQchromagram); max@1: returnVector.push_back(bwQchromagram); max@1: } max@1: max@1: /* -------------------------------- */ max@1: /* ------ Support Functions ------ */ max@1: /* -------------------------------- */ max@1: max@1: // one-dimesion median filter max@1: arma::vec medfilt1(arma::vec v, int medfilt_length) max@1: { max@1: int halfWin = medfilt_length/2; max@1: max@1: // result vector max@1: arma::vec res = arma::zeros(v.size()); max@1: max@1: // padding max@1: arma::vec padV = arma::zeros(v.size()+medfilt_length-1); max@1: max@1: for (unsigned i=medfilt_length/2; i < medfilt_length/2+v.size(); ++ i) max@1: { max@1: padV(i) = v(i-medfilt_length/2); max@1: } max@1: max@1: // Median filter max@1: arma::vec win = arma::zeros(medfilt_length); max@1: max@1: for (unsigned i=0; i < v.size(); ++i) max@1: { max@1: win = padV.subvec(i,i+halfWin*2); max@1: win = sort(win); max@1: res(i) = win(halfWin); max@1: } max@1: max@1: return res; max@1: } max@1: max@1: max@1: // Quantile max@1: double quantile(arma::vec v, double p) max@1: { max@1: arma::vec sortV = arma::sort(v); max@1: int n = sortV.size(); max@1: arma::vec x = arma::zeros(n+2); max@1: arma::vec y = arma::zeros(n+2); max@1: max@1: x(0) = 0; max@1: x(n+1) = 100; max@1: max@1: for (unsigned i=1; i=p*100); max@1: max@1: // Interpolation max@1: double x1 = x(x2index(0)-1); max@1: double x2 = x(x2index(0)); max@1: double y1 = y(x2index(0)-1); max@1: double y2 = y(x2index(0)); max@1: max@1: double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1; max@1: max@1: return res; max@1: } max@1: max@1: // Max Filtering max@1: arma::mat maxfilt1(arma::mat inmat, int len) max@1: { max@1: arma::mat outmat = inmat; max@1: max@1: for (int i=0; i 0) max@1: { max@1: // Take a window of rows max@1: int startWin; max@1: int endWin; max@1: max@1: if (0 > i-len) max@1: startWin = 0; max@1: else max@1: startWin = i-len; max@1: max@1: if (inmat.n_rows-1 < i+len-1) max@1: endWin = inmat.n_rows-1; max@1: else max@1: endWin = i+len-1; max@1: max@1: outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all)); max@1: } max@1: } max@1: max@1: return outmat; max@1: max@1: } max@1: max@1: // Null Parts max@1: Part nullpart(vector parts, arma::vec barline) max@1: { max@1: arma::uvec nullindices = arma::ones(barline.size()); max@1: for (unsigned iPart=0; iPart 0); max@1: max@1: for (unsigned i=0; i &parts) max@1: { max@1: for (unsigned iPart=0; iPart newVectorPart; max@1: max@1: if (parts[iPart].letter.compare("-")==0) max@1: { max@1: sort (parts[iPart].indices.begin(), parts[iPart].indices.end()); max@1: unsigned newpartind = -1; max@1: max@1: vector indices; max@1: indices.push_back(-2); max@1: max@1: for (unsigned iIndex=0; iIndex 1) max@1: { max@1: newpartind++; max@1: max@1: Part newPart; max@1: newPart.letter = 'n'; max@1: std::stringstream out; max@1: out << newpartind+1; max@1: newPart.letter.append(out.str()); max@1: newPart.value = 20+newpartind+1; max@1: newPart.n = 1; max@1: newPart.indices.push_back(indices[iInd]); max@1: newPart.level = 0; max@1: max@1: newVectorPart.push_back(newPart); max@1: } max@1: else max@1: { max@1: newVectorPart[newpartind].n = newVectorPart[newpartind].n+1; max@1: } max@1: } max@1: parts.erase (parts.end()); max@1: max@1: for (unsigned i=0; i songSegment(Vamp::Plugin::FeatureList quatisedChromagram) max@1: { max@1: max@1: max@1: /* ------ Parameters ------ */ max@1: double thresh_beat = 0.85; max@1: double thresh_seg = 0.80; max@1: int medfilt_length = 5; max@1: int minlength = 28; max@1: int maxlength = 128; max@1: double quantilePerc = 0.1; max@1: /* ------------------------ */ max@1: max@1: max@1: // Collect Info max@1: int nBeat = quatisedChromagram.size(); // Number of feature vector max@1: int nFeatValues = quatisedChromagram[0].values.size(); // Number of values for each feature vector max@1: max@1: arma::irowvec timeStamp = arma::zeros(1,nBeat); // Vector of Time Stamps max@1: max@1: // Save time stamp as a Vector max@1: if (quatisedChromagram[0].hasTimestamp) max@1: { max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: timeStamp[i] = quatisedChromagram[i].timestamp.nsec; max@1: } max@1: max@1: max@1: // Build a ObservationTOFeatures Matrix max@1: arma::mat featVal = arma::zeros(nBeat,nFeatValues/2); max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: for (unsigned j = 0; j < nFeatValues/2; ++ j) max@1: { max@1: featVal(i,j) = (quatisedChromagram[i].values[j]+quatisedChromagram[i].values[j+12]) * 0.8; max@1: } max@1: max@1: // Set to arbitrary value to feature vectors with low std max@1: arma::mat a = stddev(featVal,1,1); max@1: max@1: // Feature Colleration Matrix max@1: arma::mat simmat0 = 1-arma::cor(arma::trans(featVal)); max@1: max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: { max@1: if (a(i)<0.000001) max@1: { max@1: featVal(i,1) = 1000; // arbitrary max@1: max@1: for (unsigned j = 0; j < nFeatValues/2; ++j) max@1: { max@1: simmat0(i,j) = 1; max@1: simmat0(j,i) = 1; max@1: } max@1: } max@1: } max@1: max@1: arma::mat simmat = 1-simmat0/2; max@1: max@1: // -------- To delate when the proble with the add of beat will be solved ------- max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: for (unsigned j = 0; j < nBeat; ++ j) max@1: if (!std::isfinite(simmat(i,j))) max@1: simmat(i,j)=0; max@1: // ------------------------------------------------------------------------------ max@1: max@1: // Median Filtering applied to the Correlation Matrix max@1: // The median filter is for each diagonal of the Matrix max@1: arma::mat median_simmat = arma::zeros(nBeat,nBeat); max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: { max@1: arma::vec temp = medfilt1(simmat.diag(i),medfilt_length); max@1: median_simmat.diag(i) = temp; max@1: median_simmat.diag(-i) = temp; max@1: } max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: for (unsigned j = 0; j < nBeat; ++ j) max@1: if (!std::isfinite(median_simmat(i,j))) max@1: median_simmat(i,j) = 0; max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.standardise max@1: // med_median_simmat = repmat(median(median_simmat),nBeat,1); max@1: // std_median_simmat = repmat(std(median_simmat),nBeat,1); max@1: // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat; max@1: // end max@1: // -------------------------------------------------------- max@1: max@1: // Retrieve Bar Bounderies max@1: arma::uvec dup = find(median_simmat > thresh_beat); max@1: arma::mat potential_duplicates = arma::zeros(nBeat,nBeat); max@1: potential_duplicates.elem(dup) = arma::ones(dup.size()); max@1: potential_duplicates = trimatu(potential_duplicates); max@1: max@1: unsigned nPartlengths = round((maxlength-minlength)/4)+1; max@1: arma::vec partlengths = zeros(nPartlengths); max@1: max@1: for (unsigned i = 0; i < nPartlengths; ++ i) max@1: partlengths(i) = (i*4)+ minlength; max@1: max@1: // initialise arrays max@1: arma::cube simArray = zeros(nBeat,nBeat,nPartlengths); max@1: arma::cube decisionArray2 = zeros(nBeat,nBeat,nPartlengths); max@1: max@1: int conta = 0; max@1: max@1: //for (unsigned iLength = 0; iLength < nPartlengths; ++ iLength) max@1: for (unsigned iLength = 0; iLength < 20; ++ iLength) max@1: { max@1: unsigned len = partlengths(iLength); max@1: unsigned nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song max@1: max@1: for (unsigned iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns) max@1: { max@1: arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1); max@1: max@1: for (unsigned i=0; i(nUsedBeat,nUsedBeat)%tempM); max@1: max@1: // convolution max@1: arma::vec K = arma::zeros(3); max@1: K << 0.01 << 0.98 << 0.01; max@1: max@1: max@1: for (unsigned i=0; i(simArray.n_rows, simArray.n_cols); max@1: temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1)); max@1: max@1: for (unsigned i=0; i(decisionArray2.n_rows,decisionArray2.n_cols); max@1: max@1: for (unsigned rows=0; rows 0) max@1: temp(rows,cols) = 1; max@1: max@1: arma::vec currLogicSum = arma::sum(temp,1); max@1: max@1: for (unsigned iBeat=0; iBeat 1) max@1: { max@1: arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat); max@1: double currSum = sum(t); max@1: max@1: unsigned count = 0; max@1: for (unsigned i=0; i0) max@1: count++; max@1: max@1: currSum = (currSum/count)/2; max@1: max@1: arma::rowvec t1; max@1: t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat); max@1: max@1: bestval = join_cols(bestval,t1); max@1: } max@1: } max@1: max@1: // Definition of the resulting vector max@1: vector parts; max@1: max@1: // make a table of all valid sets of parts max@1: max@1: char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'}; max@1: unsigned partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; max@1: arma::vec valid_sets = arma::ones(bestval.n_rows); max@1: max@1: if (!bestval.is_empty()) max@1: { max@1: max@1: // In questo punto viene introdotto un errore alla 3 cifra decimale max@1: max@1: arma::colvec t = arma::zeros(bestval.n_rows); max@1: for (unsigned i=0; i(bestval2.n_rows, bestval2.n_cols); max@1: for (unsigned i=0; i(bestIndices.size()); max@1: for (unsigned i=0; i0) max@1: bestIndicesMap(i) = 1; max@1: max@1: arma::rowvec mask = arma::zeros(bestLength*2-1); max@1: for (unsigned i=0; i 0); max@1: max@1: for (unsigned i=0; i(s*2-1); max@1: for (unsigned i=0; i(Ind.size()); max@1: for (unsigned i=0; i0) max@1: IndMap(i) = 2; max@1: max@1: arma::rowvec t3 = arma::conv(IndMap,mask1); max@6: arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2); max@1: arma::rowvec islandsdMult = currislands%island; max@6: max@1: arma::uvec islandsIndex = find(islandsdMult > 0); max@1: max@6: if (islandsIndex.size() > 0) max@1: valid_sets(iSet) = 0; max@1: } max@1: } max@1: } max@1: else max@1: { max@1: Part newPart; max@1: newPart.n = nBeat; max@1: newPart.indices.push_back(1); max@1: newPart.letter = 'A'; max@1: newPart.value = 1; max@1: newPart.level = 1; max@1: parts.push_back(newPart); max@1: } max@6: max@1: arma::vec bar = linspace(1,nBeat,nBeat); max@1: Part np = nullpart(parts,bar); max@7: max@1: parts.push_back(np); max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.editor max@1: // [pa, ta] = partarray(parts); max@1: // parts = editorssearch(pa, ta, parts); max@1: // parts = [parts, nullpart(parts,1:nBeat)]; max@1: // end max@1: // ------------------------------------------------------------------ max@1: max@1: max@1: mergenulls(parts); max@1: max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.editor max@1: // [pa, ta] = partarray(parts); max@1: // parts = editorssearch(pa, ta, parts); max@1: // parts = [parts, nullpart(parts,1:nBeat)]; max@1: // end max@1: // ------------------------------------------------------------------ max@1: max@1: return parts; max@1: } max@1: max@1: max@1: max@1: void songSegmentChroma(Vamp::Plugin::FeatureList quatisedChromagram, vector &parts) max@1: { max@1: // Collect Info max@1: int nBeat = quatisedChromagram.size(); // Number of feature vector max@1: int nFeatValues = quatisedChromagram[0].values.size(); // Number of values for each feature vector max@1: max@1: arma::mat synchTreble = arma::zeros(nBeat,nFeatValues/2); max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: for (unsigned j = 0; j < nFeatValues/2; ++ j) max@1: { max@1: synchTreble(i,j) = quatisedChromagram[i].values[j]; max@1: } max@1: max@1: arma::mat synchBass = arma::zeros(nBeat,nFeatValues/2); max@1: max@1: for (unsigned i = 0; i < nBeat; ++ i) max@1: for (unsigned j = 0; j < nFeatValues/2; ++ j) max@1: { max@1: synchBass(i,j) = quatisedChromagram[i].values[j+12]; max@1: } max@1: max@1: // Process max@1: max@1: arma::mat segTreble = arma::zeros(quatisedChromagram.size(),quatisedChromagram[0].values.size()/2); max@1: arma::mat segBass = arma::zeros(quatisedChromagram.size(),quatisedChromagram[0].values.size()/2); max@1: max@1: for (unsigned iPart=0; iPart songSegmentIntegration(vector &parts) max@1: { max@1: // Break up parts (every part will have one instance) max@1: vector newPartVector; max@1: vector partindices; max@1: max@1: for (unsigned iPart=0; iPart parts; max@1: vector finalParts; max@1: max@1: parts = songSegment(quatisedChromagram); max@1: songSegmentChroma(quatisedChromagram,parts); max@7: max@1: finalParts = songSegmentIntegration(parts); max@1: max@1: max@1: // TEMP ---- max@6: /*for (unsigned i=0;i values; max@1: vector letters; max@1: max@1: for (unsigned iPart=0; iPart