Mercurial > hg > segmenter-vamp-plugin
diff segmentino/Segmentino.cpp @ 48:69251e11a913
Rename SongParts/songpartitioner to Segmentino throughout
author | Chris Cannam |
---|---|
date | Thu, 13 Jun 2013 09:43:01 +0100 |
parents | songparts/SongParts.cpp@f59ff6a22f8e |
children | 1ec0e2823891 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/segmentino/Segmentino.cpp Thu Jun 13 09:43:01 2013 +0100 @@ -0,0 +1,1930 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + Segmentino + + Code by Massimiliano Zanoni and Matthias Mauch + Centre for Digital Music, Queen Mary, University of London + + Copyright 2009-2013 Queen Mary, University of London. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. See the file + COPYING included with this distribution for more information. +*/ + +#include "Segmentino.h" + +#include <base/Window.h> +#include <dsp/onsets/DetectionFunction.h> +#include <dsp/onsets/PeakPicking.h> +#include <dsp/transforms/FFT.h> +#include <dsp/tempotracking/TempoTrackV2.h> +#include <dsp/tempotracking/DownBeat.h> +#include <chromamethods.h> +#include <maths/MathUtilities.h> +#include <boost/numeric/ublas/matrix.hpp> +#include <boost/numeric/ublas/io.hpp> +#include <boost/math/distributions/normal.hpp> +#include "armadillo" +#include <fstream> +#include <sstream> +#include <cmath> +#include <vector> + +#include <vamp-sdk/Plugin.h> + +using namespace boost::numeric; +using namespace arma; +using std::string; +using std::vector; +using std::cerr; +using std::cout; +using std::endl; + + +#ifndef __GNUC__ +#include <alloca.h> +#endif + + +// Result Struct +typedef struct Part { + int n; + vector<int> indices; + string letter; + int value; + int level; + int nInd; +}Part; + + + +/* ------------------------------------ */ +/* ----- BEAT DETECTOR CLASS ---------- */ +/* ------------------------------------ */ + +class BeatTrackerData +{ + /* --- ATTRIBUTES --- */ +public: + DFConfig dfConfig; + DetectionFunction *df; + DownBeat *downBeat; + vector<double> dfOutput; + Vamp::RealTime origin; + + + /* --- METHODS --- */ + + /* --- Constructor --- */ +public: + BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) { + + df = new DetectionFunction(config); + // decimation factor aims at resampling to c. 3KHz; must be power of 2 + int factor = MathUtilities::nextPowerOfTwo(rate / 3000); + // std::cerr << "BeatTrackerData: factor = " << factor << std::endl; + downBeat = new DownBeat(rate, factor, config.stepSize); + } + + /* --- Desctructor --- */ + ~BeatTrackerData() { + delete df; + delete downBeat; + } + + void reset() { + delete df; + df = new DetectionFunction(dfConfig); + dfOutput.clear(); + downBeat->resetAudioBuffer(); + origin = Vamp::RealTime::zeroTime; + } +}; + + +/* --------------------------------------- */ +/* ----- CHROMA EXTRACTOR CLASS ---------- */ +/* --------------------------------------- */ + +class ChromaData +{ + + /* --- ATTRIBUTES --- */ + +public: + int frameCount; + int nBPS; + Vamp::Plugin::FeatureList logSpectrum; + int blockSize; + int lengthOfNoteIndex; + vector<float> meanTunings; + vector<float> localTunings; + float whitening; + float preset; + float useNNLS; + vector<float> localTuning; + vector<float> kernelValue; + vector<int> kernelFftIndex; + vector<int> kernelNoteIndex; + float *dict; + bool tuneLocal; + float doNormalizeChroma; + float rollon; + float s; + vector<float> hw; + vector<float> sinvalues; + vector<float> cosvalues; + Window<float> window; + FFTReal fft; + int inputSampleRate; + + /* --- METHODS --- */ + + /* --- Constructor --- */ + +public: + ChromaData(float inputSampleRate, size_t block_size) : + frameCount(0), + nBPS(3), + logSpectrum(0), + blockSize(0), + lengthOfNoteIndex(0), + meanTunings(0), + localTunings(0), + whitening(1.0), + preset(0.0), + useNNLS(1.0), + localTuning(0.0), + kernelValue(0), + kernelFftIndex(0), + kernelNoteIndex(0), + dict(0), + tuneLocal(0.0), + doNormalizeChroma(0), + rollon(0.0), + s(0.7), + sinvalues(0), + cosvalues(0), + window(HanningWindow, block_size), + fft(block_size), + inputSampleRate(inputSampleRate) + { + // make the *note* dictionary matrix + dict = new float[nNote * 84]; + for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0; + blockSize = block_size; + } + + + /* --- Desctructor --- */ + + ~ChromaData() { + delete [] dict; + } + + /* --- Public Methods --- */ + + void reset() { + frameCount = 0; + logSpectrum.clear(); + for (int iBPS = 0; iBPS < 3; ++iBPS) { + meanTunings[iBPS] = 0; + localTunings[iBPS] = 0; + } + localTuning.clear(); + } + + void baseProcess(float *inputBuffers, Vamp::RealTime timestamp) + { + + frameCount++; + float *magnitude = new float[blockSize/2]; + double *fftReal = new double[blockSize]; + double *fftImag = new double[blockSize]; + + // FFTReal wants doubles, so we need to make a local copy of inputBuffers + double *inputBuffersDouble = new double[blockSize]; + for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i]; + + fft.process(false, inputBuffersDouble, fftReal, fftImag); + + float energysum = 0; + // make magnitude + float maxmag = -10000; + for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) { + magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] + + fftImag[iBin] * fftImag[iBin]); + if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize; + // a valid audio signal (between -1 and 1) should not be limited here. + if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; + if (rollon > 0) { + energysum += pow(magnitude[iBin],2); + } + } + + float cumenergy = 0; + if (rollon > 0) { + for (int iBin = 2; iBin < static_cast<int>(blockSize/2); iBin++) { + cumenergy += pow(magnitude[iBin],2); + if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0; + else break; + } + } + + if (maxmag < 2) { + // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; + for (int iBin = 0; iBin < static_cast<int>(blockSize/2); iBin++) { + magnitude[iBin] = 0; + } + } + + // cerr << magnitude[200] << endl; + + // note magnitude mapping using pre-calculated matrix + float *nm = new float[nNote]; // note magnitude + for (int iNote = 0; iNote < nNote; iNote++) { + nm[iNote] = 0; // initialise as 0 + } + int binCount = 0; + for (vector<float>::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) { + nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount]; + binCount++; + } + + float one_over_N = 1.0/frameCount; + // update means of complex tuning variables + for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N; + + for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { + for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; + float ratioOld = 0.997; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + localTunings[iBPS] *= ratioOld; + localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); + } + } + + float localTuningImag = 0; + float localTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + localTuningReal += localTunings[iBPS] * cosvalues[iBPS]; + localTuningImag += localTunings[iBPS] * sinvalues[iBPS]; + } + + float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); + localTuning.push_back(normalisedtuning); + + Vamp::Plugin::Feature f1; // logfreqspec + f1.hasTimestamp = true; + f1.timestamp = timestamp; + for (int iNote = 0; iNote < nNote; iNote++) { + f1.values.push_back(nm[iNote]); + } + + // deletes + delete[] inputBuffersDouble; + delete[] magnitude; + delete[] fftReal; + delete[] fftImag; + delete[] nm; + + logSpectrum.push_back(f1); // remember note magnitude + } + + bool initialise() + { + dictionaryMatrix(dict, s); + + // make things for tuning estimation + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); + cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); + } + + + // make hamming window of length 1/2 octave + int hamwinlength = nBPS * 6 + 1; + float hamwinsum = 0; + for (int i = 0; i < hamwinlength; ++i) { + hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); + hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); + } + for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; + + + // initialise the tuning + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + meanTunings.push_back(0); + localTunings.push_back(0); + } + + blockSize = blockSize; + frameCount = 0; + int tempn = nNote * blockSize/2; + // cerr << "length of tempkernel : " << tempn << endl; + float *tempkernel; + + tempkernel = new float[tempn]; + + logFreqMatrix(inputSampleRate, blockSize, tempkernel); + kernelValue.clear(); + kernelFftIndex.clear(); + kernelNoteIndex.clear(); + int countNonzero = 0; + for (int iNote = 0; iNote < nNote; ++iNote) { + // I don't know if this is wise: manually making a sparse matrix + for (int iFFT = 0; iFFT < static_cast<int>(blockSize/2); ++iFFT) { + if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { + kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); + if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { + countNonzero++; + } + kernelFftIndex.push_back(iFFT); + kernelNoteIndex.push_back(iNote); + } + } + } + delete [] tempkernel; + + return true; + } +}; + + +/* --------------------------------- */ +/* ----- SONG PARTITIONER ---------- */ +/* --------------------------------- */ + + +/* --- ATTRIBUTES --- */ + +float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100 +int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's +int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's + + +/* --- METHODS --- */ + +/* --- Constructor --- */ +Segmentino::Segmentino(float inputSampleRate) : + Vamp::Plugin(inputSampleRate), + m_d(0), + m_chromadata(0), + m_bpb(4), + m_pluginFrameCount(0) +{ +} + + +/* --- Desctructor --- */ +Segmentino::~Segmentino() +{ + delete m_d; + delete m_chromadata; +} + + +/* --- Methods --- */ +string Segmentino::getIdentifier() const +{ + return "qm-songpartitioner"; +} + +string Segmentino::getName() const +{ + return "Song Partitioner"; +} + +string Segmentino::getDescription() const +{ + return "Estimate contiguous segments pertaining to song parts such as verse and chorus."; +} + +string Segmentino::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int Segmentino::getPluginVersion() const +{ + return 2; +} + +string Segmentino::getCopyright() const +{ + return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2012 QMUL - All Rights Reserved"; +} + +Segmentino::ParameterList Segmentino::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + + // desc.identifier = "bpb"; + // desc.name = "Beats per Bar"; + // desc.description = "The number of beats in each bar"; + // desc.minValue = 2; + // desc.maxValue = 16; + // desc.defaultValue = 4; + // desc.isQuantized = true; + // desc.quantizeStep = 1; + // list.push_back(desc); + + return list; +} + +float Segmentino::getParameter(std::string name) const +{ + if (name == "bpb") return m_bpb; + return 0.0; +} + +void Segmentino::setParameter(std::string name, float value) +{ + if (name == "bpb") m_bpb = lrintf(value); +} + + +// Return the StepSize for Chroma Extractor +size_t Segmentino::getPreferredStepSize() const +{ + size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); + if (step < 1) step = 1; + + return step; +} + +// Return the BlockSize for Chroma Extractor +size_t Segmentino::getPreferredBlockSize() const +{ + size_t theoretical = getPreferredStepSize() * 2; + theoretical *= m_chromaFramesizeFactor; + + return theoretical; +} + + +// Initialize the plugin and define Beat Tracker and Chroma Extractor Objects +bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (m_d) { + delete m_d; + m_d = 0; + } + if (m_chromadata) { + delete m_chromadata; + m_chromadata = 0; + } + + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) { + std::cerr << "Segmentino::initialise: Unsupported channel count: " + << channels << std::endl; + return false; + } + + if (stepSize != getPreferredStepSize()) { + std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: " + << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; + return false; + } + + if (blockSize != getPreferredBlockSize()) { + std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: " + << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; + } + + // Beat tracker and Chroma extractor has two different configuration parameters + + // Configuration Parameters for Beat Tracker + DFConfig dfConfig; + dfConfig.DFType = DF_COMPLEXSD; + dfConfig.stepSize = stepSize; + dfConfig.frameLength = blockSize / m_chromaFramesizeFactor; + dfConfig.dbRise = 3; + dfConfig.adaptiveWhitening = false; + dfConfig.whiteningRelaxCoeff = -1; + dfConfig.whiteningFloor = -1; + + // Initialise Beat Tracker + m_d = new BeatTrackerData(m_inputSampleRate, dfConfig); + m_d->downBeat->setBeatsPerBar(m_bpb); + + // Initialise Chroma Extractor + m_chromadata = new ChromaData(m_inputSampleRate, blockSize); + m_chromadata->initialise(); + + return true; +} + +void Segmentino::reset() +{ + if (m_d) m_d->reset(); + if (m_chromadata) m_chromadata->reset(); + m_pluginFrameCount = 0; +} + +Segmentino::OutputList Segmentino::getOutputDescriptors() const +{ + OutputList list; + int outputCounter = 0; + + OutputDescriptor beat; + beat.identifier = "beats"; + beat.name = "Beats"; + beat.description = "Beat locations labelled with metrical position"; + beat.unit = ""; + beat.hasFixedBinCount = true; + beat.binCount = 0; + beat.sampleType = OutputDescriptor::VariableSampleRate; + beat.sampleRate = 1.0 / m_stepSecs; + m_beatOutputNumber = outputCounter++; + + OutputDescriptor bars; + bars.identifier = "bars"; + bars.name = "Bars"; + bars.description = "Bar locations"; + bars.unit = ""; + bars.hasFixedBinCount = true; + bars.binCount = 0; + bars.sampleType = OutputDescriptor::VariableSampleRate; + bars.sampleRate = 1.0 / m_stepSecs; + m_barsOutputNumber = outputCounter++; + + OutputDescriptor beatcounts; + beatcounts.identifier = "beatcounts"; + beatcounts.name = "Beat Count"; + beatcounts.description = "Beat counter function"; + beatcounts.unit = ""; + beatcounts.hasFixedBinCount = true; + beatcounts.binCount = 1; + beatcounts.sampleType = OutputDescriptor::VariableSampleRate; + beatcounts.sampleRate = 1.0 / m_stepSecs; + m_beatcountsOutputNumber = outputCounter++; + + OutputDescriptor beatsd; + beatsd.identifier = "beatsd"; + beatsd.name = "Beat Spectral Difference"; + beatsd.description = "Beat spectral difference function used for bar-line detection"; + beatsd.unit = ""; + beatsd.hasFixedBinCount = true; + beatsd.binCount = 1; + beatsd.sampleType = OutputDescriptor::VariableSampleRate; + beatsd.sampleRate = 1.0 / m_stepSecs; + m_beatsdOutputNumber = outputCounter++; + + OutputDescriptor logscalespec; + logscalespec.identifier = "logscalespec"; + logscalespec.name = "Log-Frequency Spectrum"; + logscalespec.description = "Spectrum with linear frequency on a log scale."; + logscalespec.unit = ""; + logscalespec.hasFixedBinCount = true; + logscalespec.binCount = nNote; + logscalespec.hasKnownExtents = false; + logscalespec.isQuantized = false; + logscalespec.sampleType = OutputDescriptor::FixedSampleRate; + logscalespec.hasDuration = false; + logscalespec.sampleRate = m_inputSampleRate/2048; + m_logscalespecOutputNumber = outputCounter++; + + OutputDescriptor bothchroma; + bothchroma.identifier = "bothchroma"; + bothchroma.name = "Chromagram and Bass Chromagram"; + bothchroma.description = "Tuning-adjusted chromagram and bass chromagram (stacked on top of each other) from NNLS approximate transcription."; + bothchroma.unit = ""; + bothchroma.hasFixedBinCount = true; + bothchroma.binCount = 24; + bothchroma.hasKnownExtents = false; + bothchroma.isQuantized = false; + bothchroma.sampleType = OutputDescriptor::FixedSampleRate; + bothchroma.hasDuration = false; + bothchroma.sampleRate = m_inputSampleRate/2048; + m_bothchromaOutputNumber = outputCounter++; + + OutputDescriptor qchromafw; + qchromafw.identifier = "qchromafw"; + qchromafw.name = "Pseudo-Quantised Chromagram and Bass Chromagram"; + qchromafw.description = "Pseudo-Quantised Chromagram and Bass Chromagram (frames between two beats are identical)."; + qchromafw.unit = ""; + qchromafw.hasFixedBinCount = true; + qchromafw.binCount = 24; + qchromafw.hasKnownExtents = false; + qchromafw.isQuantized = false; + qchromafw.sampleType = OutputDescriptor::FixedSampleRate; + qchromafw.hasDuration = false; + qchromafw.sampleRate = m_inputSampleRate/2048; + m_qchromafwOutputNumber = outputCounter++; + + OutputDescriptor qchroma; + qchroma.identifier = "qchroma"; + qchroma.name = "Quantised Chromagram and Bass Chromagram"; + qchroma.description = "Quantised Chromagram and Bass Chromagram."; + qchroma.unit = ""; + qchroma.hasFixedBinCount = true; + qchroma.binCount = 24; + qchroma.hasKnownExtents = false; + qchroma.isQuantized = false; + qchroma.sampleType = OutputDescriptor::FixedSampleRate; + qchroma.hasDuration = true; + qchroma.sampleRate = m_inputSampleRate/2048; + m_qchromaOutputNumber = outputCounter++; + + OutputDescriptor segm; + segm.identifier = "segmentation"; + segm.name = "Segmentation"; + segm.description = "Segmentation"; + segm.unit = "segment-type"; + segm.hasFixedBinCount = true; + //segm.binCount = 24; + segm.binCount = 1; + segm.hasKnownExtents = true; + segm.minValue = 1; + segm.maxValue = 5; + segm.isQuantized = true; + segm.quantizeStep = 1; + segm.sampleType = OutputDescriptor::VariableSampleRate; + segm.sampleRate = 1.0 / m_stepSecs; + segm.hasDuration = true; + m_segmOutputNumber = outputCounter++; + + + /* + OutputList list; + OutputDescriptor segmentation; + segmentation.identifier = "segmentation"; + segmentation.name = "Segmentation"; + segmentation.description = "Segmentation"; + segmentation.unit = "segment-type"; + segmentation.hasFixedBinCount = true; + segmentation.binCount = 1; + segmentation.hasKnownExtents = true; + segmentation.minValue = 1; + segmentation.maxValue = nSegmentTypes; + segmentation.isQuantized = true; + segmentation.quantizeStep = 1; + segmentation.sampleType = OutputDescriptor::VariableSampleRate; + segmentation.sampleRate = m_inputSampleRate / getPreferredStepSize(); + list.push_back(segmentation); + return list; + */ + + + list.push_back(beat); + list.push_back(bars); + list.push_back(beatcounts); + list.push_back(beatsd); + list.push_back(logscalespec); + list.push_back(bothchroma); + list.push_back(qchromafw); + list.push_back(qchroma); + list.push_back(segm); + + return list; +} + +// Executed for each frame - called from the host + +// We use time domain input, because DownBeat requires it -- so we +// use the time-domain version of DetectionFunction::process which +// does its own FFT. It requires doubles as input, so we need to +// make a temporary copy + +// We only support a single input channel +Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers,Vamp::RealTime timestamp) +{ + if (!m_d) { + cerr << "ERROR: Segmentino::process: " + << "Segmentino has not been initialised" + << endl; + return FeatureSet(); + } + + const int fl = m_d->dfConfig.frameLength; +#ifndef __GNUC__ + double *dfinput = (double *)alloca(fl * sizeof(double)); +#else + double dfinput[fl]; +#endif + int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2; + + // Since chroma needs a much longer frame size, we only ever use the very + // beginning of the frame for beat tracking. + for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i]; + double output = m_d->df->process(dfinput); + + if (m_d->dfOutput.empty()) m_d->origin = timestamp; + +// std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl; + m_d->dfOutput.push_back(output); + + // Downsample and store the incoming audio block. + // We have an overlap on the incoming audio stream (step size is + // half block size) -- this function is configured to take only a + // step size's worth, so effectively ignoring the overlap. Note + // however that this means we omit the last blocksize - stepsize + // samples completely for the purposes of barline detection + // (hopefully not a problem) + m_d->downBeat->pushAudioBlock(inputBuffers[0]); + + // The following is not done every time, but only every m_chromaFramesizeFactor times, + // because the chroma does not need dense time frames. + + if (m_pluginFrameCount % m_chromaStepsizeFactor == 0) + { + + // Window the full time domain, data, FFT it and process chroma stuff. + + #ifndef __GNUC__ + float *windowedBuffers = (float *)alloca(m_chromadata->blockSize * sizeof(float)); + #else + float windowedBuffers[m_chromadata->blockSize]; + #endif + m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]); + + // adjust timestamp (we want the middle of the frame) + timestamp = timestamp + Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate)); + + m_chromadata->baseProcess(&windowedBuffers[0], timestamp); + + } + m_pluginFrameCount++; + + FeatureSet fs; + fs[m_logscalespecOutputNumber].push_back( + m_chromadata->logSpectrum.back()); + return fs; +} + +Segmentino::FeatureSet Segmentino::getRemainingFeatures() +{ + if (!m_d) { + cerr << "ERROR: Segmentino::getRemainingFeatures: " + << "Segmentino has not been initialised" + << endl; + return FeatureSet(); + } + + FeatureSet masterFeatureset = beatTrack(); + Vamp::RealTime last_beattime = masterFeatureset[m_beatOutputNumber][masterFeatureset[m_beatOutputNumber].size()-1].timestamp; + masterFeatureset[m_beatOutputNumber].clear(); + Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0); + while (beattime < last_beattime) + { + Feature beatfeature; + beatfeature.hasTimestamp = true; + beatfeature.timestamp = beattime; + masterFeatureset[m_beatOutputNumber].push_back(beatfeature); + beattime = beattime + Vamp::RealTime::fromSeconds(0.5); + } + + + FeatureList chromaList = chromaFeatures(); + + for (int i = 0; i < (int)chromaList.size(); ++i) + { + masterFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]); + } + + // quantised and pseudo-quantised (beat-wise) chroma + std::vector<FeatureList> quantisedChroma = beatQuantiser(chromaList, masterFeatureset[m_beatOutputNumber]); + + if (quantisedChroma.empty()) return masterFeatureset; + + masterFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0]; + masterFeatureset[m_qchromaOutputNumber] = quantisedChroma[1]; + + // Segmentation + try { + masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]); + } catch (std::bad_alloc &a) { + cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl; + } + + return(masterFeatureset); +} + +/* ------ Beat Tracker ------ */ + +Segmentino::FeatureSet Segmentino::beatTrack() +{ + vector<double> df; + vector<double> beatPeriod; + vector<double> tempi; + + for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts + df.push_back(m_d->dfOutput[i]); + beatPeriod.push_back(0.0); + } + if (df.empty()) return FeatureSet(); + + TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); + tt.calculateBeatPeriod(df, beatPeriod, tempi); + + vector<double> beats; + tt.calculateBeats(df, beatPeriod, beats); + + vector<int> downbeats; + size_t downLength = 0; + const float *downsampled = m_d->downBeat->getBufferedAudio(downLength); + m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats); + + vector<double> beatsd; + m_d->downBeat->getBeatSD(beatsd); + + /*std::cout << "BeatTracker: found downbeats at: "; + for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/ + + FeatureSet returnFeatures; + + char label[20]; + + int dbi = 0; + int beat = 0; + int bar = 0; + + if (!downbeats.empty()) { + // get the right number for the first beat; this will be + // incremented before use (at top of the following loop) + int firstDown = downbeats[0]; + beat = m_bpb - firstDown - 1; + if (beat == m_bpb) beat = 0; + } + + for (int i = 0; i < (int)beats.size(); ++i) { + + int frame = beats[i] * m_d->dfConfig.stepSize; + + if (dbi < (int)downbeats.size() && i == downbeats[dbi]) { + beat = 0; + ++bar; + ++dbi; + } else { + ++beat; + } + + /* Ooutput Section */ + + // outputs are: + // + // 0 -> beats + // 1 -> bars + // 2 -> beat counter function + + Feature feature; + feature.hasTimestamp = true; + feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate)); + + sprintf(label, "%d", beat + 1); + feature.label = label; + returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats + + feature.values.push_back(beat + 1); + returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function + + if (i > 0 && i <= (int)beatsd.size()) { + feature.values.clear(); + feature.values.push_back(beatsd[i-1]); + feature.label = ""; + returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference + } + + if (beat == 0) { + feature.values.clear(); + sprintf(label, "%d", bar); + feature.label = label; + returnFeatures[m_barsOutputNumber].push_back(feature); // bars + } + } + + return returnFeatures; +} + + +/* ------ Chroma Extractor ------ */ + +Segmentino::FeatureList Segmentino::chromaFeatures() +{ + + FeatureList returnFeatureList; + FeatureList tunedlogfreqspec; + + if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList; + + /** Calculate Tuning + calculate tuning from (using the angle of the complex number defined by the + cumulative mean real and imag values) + **/ + float meanTuningImag = 0; + float meanTuningReal = 0; + for (int iBPS = 0; iBPS < nBPS; ++iBPS) { + meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS]; + meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS]; + } + float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); + float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); + int intShift = floor(normalisedtuning * 3); + float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this + + char buffer0 [50]; + + sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); + + /** Tune Log-Frequency Spectrogram + calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to + perform linear interpolation on the existing log-frequency spectrogram (kinda f1). + **/ + cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; + + float tempValue = 0; + + int count = 0; + + for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i) + { + + Feature f1 = *i; + Feature f2; // tuned log-frequency spectrum + + f2.hasTimestamp = true; + f2.timestamp = f1.timestamp; + + f2.values.push_back(0.0); + f2.values.push_back(0.0); // set lower edge to zero + + if (m_chromadata->tuneLocal) { + intShift = floor(m_chromadata->localTuning[count] * 3); + floatShift = m_chromadata->localTuning[count] * 3 - intShift; + // floatShift is a really bad name for this + } + + for (int k = 2; k < (int)f1.values.size() - 3; ++k) + { // interpolate all inner bins + tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; + f2.values.push_back(tempValue); + } + + f2.values.push_back(0.0); + f2.values.push_back(0.0); + f2.values.push_back(0.0); // upper edge + + vector<float> runningmean = SpecialConvolution(f2.values,m_chromadata->hw); + vector<float> runningstd; + for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) + runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); + } + runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve + for (int i = 0; i < nNote; i++) + { + + runningstd[i] = sqrt(runningstd[i]); + // square root to finally have running std + + if (runningstd[i] > 0) + { + f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? + (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0; + } + + if (f2.values[i] < 0) { + + cerr << "ERROR: negative value in logfreq spectrum" << endl; + + } + } + tunedlogfreqspec.push_back(f2); + count++; + } + cerr << "done." << endl; + /** Semitone spectrum and chromagrams + Semitone-spaced log-frequency spectrum derived + from the tuned log-freq spectrum above. the spectrum + is inferred using a non-negative least squares algorithm. + Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means + bass and treble stacked onto each other). + **/ + if (m_chromadata->useNNLS == 0) { + cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; + } else { + cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; + } + + vector<float> oldchroma = vector<float>(12,0); + vector<float> oldbasschroma = vector<float>(12,0); + count = 0; + + for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) { + Feature logfreqsp = *it; // logfreq spectrum + Feature bothchroma; // treble and bass chromagram + + bothchroma.hasTimestamp = true; + bothchroma.timestamp = logfreqsp.timestamp; + + float b[nNote]; + + bool some_b_greater_zero = false; + float sumb = 0; + for (int i = 0; i < nNote; i++) { + b[i] = logfreqsp.values[i]; + sumb += b[i]; + if (b[i] > 0) { + some_b_greater_zero = true; + } + } + + // here's where the non-negative least squares algorithm calculates the note activation x + + vector<float> chroma = vector<float>(12, 0); + vector<float> basschroma = vector<float>(12, 0); + float currval; + int iSemitone = 0; + + if (some_b_greater_zero) { + if (m_chromadata->useNNLS == 0) { + for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { + currval = 0; + for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { + currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); + } + chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; + basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; + iSemitone++; + } + + } else { + float x[84+1000]; + for (int i = 1; i < 1084; ++i) x[i] = 1.0; + vector<int> signifIndex; + int index=0; + sumb /= 84.0; + for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { + float currval = 0; + for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { + currval += b[iNote + iBPS]; + } + if (currval > 0) signifIndex.push_back(index); + index++; + } + float rnorm; + float w[84+1000]; + float zz[84+1000]; + int indx[84+1000]; + int mode; + int dictsize = nNote*signifIndex.size(); + + float *curr_dict = new float[dictsize]; + for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { + for (int iBin = 0; iBin < nNote; iBin++) { + curr_dict[iNote * nNote + iBin] = + 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin]; + } + } + nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); + delete [] curr_dict; + for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { + // cerr << mode << endl; + chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; + basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; + } + } + } + + chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); + // just stack the both chromas + + bothchroma.values = chroma; + returnFeatureList.push_back(bothchroma); + count++; + } + cerr << "done." << endl; + + return returnFeatureList; +} + +/* ------ Beat Quantizer ------ */ + +std::vector<Vamp::Plugin::FeatureList> +Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats) +{ + std::vector<FeatureList> returnVector; + + FeatureList fwQchromagram; // frame-wise beat-quantised chroma + FeatureList bwQchromagram; // beat-wise beat-quantised chroma + + + size_t nChromaFrame = chromagram.size(); + size_t nBeat = beats.size(); + + if (nBeat == 0 && nChromaFrame == 0) return returnVector; + + int nBin = chromagram[0].values.size(); + + vector<float> tempChroma = vector<float>(nBin); + + Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime; + int currBeatCount = -1; // start before first beat + int framesInBeat = 0; + + for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma) + { + Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp; + Vamp::RealTime newBeatTimestamp; + + if (currBeatCount != (int)beats.size() - 1) { + newBeatTimestamp = beats[currBeatCount+1].timestamp; + } else { + newBeatTimestamp = chromagram[nChromaFrame-1].timestamp; + } + + if (frameTimestamp > newBeatTimestamp || + iChroma == nChromaFrame-1) + { + // new beat (or last chroma frame) + // 1. finish all the old beat processing + if (framesInBeat > 0) + { + for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average + } + + Feature bwQchromaFrame; + bwQchromaFrame.hasTimestamp = true; + bwQchromaFrame.timestamp = beatTimestamp; + bwQchromaFrame.values = tempChroma; + bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp; + bwQchromagram.push_back(bwQchromaFrame); + + for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame) + { + Feature fwQchromaFrame; + fwQchromaFrame.hasTimestamp = true; + fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp; + fwQchromaFrame.values = tempChroma; // all between two beats get the same + fwQchromagram.push_back(fwQchromaFrame); + } + + // 2. increments / resets for current (new) beat + currBeatCount++; + beatTimestamp = newBeatTimestamp; + for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average + framesInBeat = 0; + } + framesInBeat++; + for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i]; + } + returnVector.push_back(fwQchromagram); + returnVector.push_back(bwQchromagram); + return returnVector; +} + + + +/* -------------------------------- */ +/* ------ Support Functions ------ */ +/* -------------------------------- */ + +// one-dimesion median filter +arma::vec medfilt1(arma::vec v, int medfilt_length) +{ + // TODO: check if this works with odd and even medfilt_length !!! + int halfWin = medfilt_length/2; + + // result vector + arma::vec res = arma::zeros<arma::vec>(v.size()); + + // padding + arma::vec padV = arma::zeros<arma::vec>(v.size()+medfilt_length-1); + + for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i) + { + padV(i) = v(i-medfilt_length/2); + } + + // the above loop leaves the boundaries at 0, + // the two loops below fill them with the start or end values of v at start and end + for (int i = 0; i < halfWin; ++i) padV(i) = v(0); + for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1); + + + + // Median filter + arma::vec win = arma::zeros<arma::vec>(medfilt_length); + + for (int i=0; i < (int)v.size(); ++i) + { + win = padV.subvec(i,i+halfWin*2); + win = sort(win); + res(i) = win(halfWin); + } + + return res; +} + + +// Quantile +double quantile(arma::vec v, double p) +{ + arma::vec sortV = arma::sort(v); + int n = sortV.size(); + arma::vec x = arma::zeros<vec>(n+2); + arma::vec y = arma::zeros<vec>(n+2); + + x(0) = 0; + x(n+1) = 100; + + for (int i=1; i<n+1; ++i) + x(i) = 100*(0.5+(i-1))/n; + + y(0) = sortV(0); + y.subvec(1,n) = sortV; + y(n+1) = sortV(n-1); + + arma::uvec x2index = find(x>=p*100); + + // Interpolation + double x1 = x(x2index(0)-1); + double x2 = x(x2index(0)); + double y1 = y(x2index(0)-1); + double y2 = y(x2index(0)); + + double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1; + + return res; +} + +// Max Filtering +arma::mat maxfilt1(arma::mat inmat, int len) +{ + arma::mat outmat = inmat; + + for (int i=0; i < (int)inmat.n_rows; ++i) + { + if (arma::sum(inmat.row(i)) > 0) + { + // Take a window of rows + int startWin; + int endWin; + + if (0 > i-len) + startWin = 0; + else + startWin = i-len; + + if ((int)inmat.n_rows-1 < i+len-1) + endWin = inmat.n_rows-1; + else + endWin = i+len-1; + + outmat(i,span::all) = arma::max(inmat(span(startWin,endWin),span::all)); + } + } + + return outmat; + +} + +// Null Parts +Part nullpart(vector<Part> parts, arma::vec barline) +{ + arma::uvec nullindices = arma::ones<arma::uvec>(barline.size()); + for (int iPart=0; iPart<(int)parts.size(); ++iPart) + { + //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex) + for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex) + for (int i=0; i<parts[iPart].n; ++i) + { + int ind = parts[iPart].indices[iIndex]+i; + nullindices(ind) = 0; + } + } + + Part newPart; + newPart.n = 1; + uvec q = find(nullindices > 0); + + for (int i=0; i<(int)q.size();++i) + newPart.indices.push_back(q(i)); + + newPart.letter = '-'; + newPart.value = 0; + newPart.level = 0; + + return newPart; +} + + +// Merge Nulls +void mergenulls(vector<Part> &parts) +{ + for (int iPart=0; iPart<(int)parts.size(); ++iPart) + { + + vector<Part> newVectorPart; + + if (parts[iPart].letter.compare("-")==0) + { + sort (parts[iPart].indices.begin(), parts[iPart].indices.end()); + int newpartind = -1; + + vector<int> indices; + indices.push_back(-2); + + for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) + indices.push_back(parts[iPart].indices[iIndex]); + + for (int iInd=1; iInd < (int)indices.size(); ++iInd) + { + if (indices[iInd] - indices[iInd-1] > 1) + { + newpartind++; + + Part newPart; + newPart.letter = 'N'; + std::stringstream out; + out << newpartind+1; + newPart.letter.append(out.str()); + // newPart.value = 20+newpartind+1; + newPart.value = 0; + newPart.n = 1; + newPart.indices.push_back(indices[iInd]); + newPart.level = 0; + + newVectorPart.push_back(newPart); + } + else + { + newVectorPart[newpartind].n = newVectorPart[newpartind].n+1; + } + } + parts.erase (parts.end()); + + for (int i=0; i<(int)newVectorPart.size(); ++i) + parts.push_back(newVectorPart[i]); + } + } +} + +/* ------ Segmentation ------ */ + +vector<Part> songSegment(Vamp::Plugin::FeatureList quantisedChromagram) +{ + + + /* ------ Parameters ------ */ + double thresh_beat = 0.85; + double thresh_seg = 0.80; + int medfilt_length = 5; + int minlength = 28; + int maxlength = 2*128; + double quantilePerc = 0.1; + /* ------------------------ */ + + + // Collect Info + int nBeat = quantisedChromagram.size(); // Number of feature vector + int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector + + if (nBeat < minlength) { + // return a single part + vector<Part> parts; + Part newPart; + newPart.n = 1; + newPart.indices.push_back(0); + newPart.letter = "n1"; + newPart.value = 20; + newPart.level = 0; + parts.push_back(newPart); + return parts; + } + + arma::irowvec timeStamp = arma::zeros<arma::imat>(1,nBeat); // Vector of Time Stamps + + // Save time stamp as a Vector + if (quantisedChromagram[0].hasTimestamp) + { + for (int i = 0; i < nBeat; ++ i) + timeStamp[i] = quantisedChromagram[i].timestamp.nsec; + } + + + // Build a ObservationTOFeatures Matrix + arma::mat featVal = arma::zeros<mat>(nBeat,nFeatValues/2); + + for (int i = 0; i < nBeat; ++ i) + for (int j = 0; j < nFeatValues/2; ++ j) + { + featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated + } + + // Set to arbitrary value to feature vectors with low std + arma::mat a = stddev(featVal,1,1); + + // Feature Correlation Matrix + arma::mat simmat0 = 1-arma::cor(arma::trans(featVal)); + + + for (int i = 0; i < nBeat; ++ i) + { + if (a(i)<0.000001) + { + featVal(i,1) = 1000; // arbitrary + + for (int j = 0; j < nFeatValues/2; ++j) + { + simmat0(i,j) = 1; + simmat0(j,i) = 1; + } + } + } + + arma::mat simmat = 1-simmat0/2; + + // -------- To delate when the proble with the add of beat will be solved ------- + for (int i = 0; i < nBeat; ++ i) + for (int j = 0; j < nBeat; ++ j) + if (!std::isfinite(simmat(i,j))) + simmat(i,j)=0; + // ------------------------------------------------------------------------------ + + // Median Filtering applied to the Correlation Matrix + // The median filter is for each diagonal of the Matrix + arma::mat median_simmat = arma::zeros<arma::mat>(nBeat,nBeat); + + for (int i = 0; i < nBeat; ++ i) + { + arma::vec temp = medfilt1(simmat.diag(i),medfilt_length); + median_simmat.diag(i) = temp; + median_simmat.diag(-i) = temp; + } + + for (int i = 0; i < nBeat; ++ i) + for (int j = 0; j < nBeat; ++ j) + if (!std::isfinite(median_simmat(i,j))) + median_simmat(i,j) = 0; + + // -------------- NOT CONVERTED ------------------------------------- + // if param.seg.standardise + // med_median_simmat = repmat(median(median_simmat),nBeat,1); + // std_median_simmat = repmat(std(median_simmat),nBeat,1); + // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat; + // end + // -------------------------------------------------------- + + // Retrieve Bar Bounderies + arma::uvec dup = find(median_simmat > thresh_beat); + arma::mat potential_duplicates = arma::zeros<arma::mat>(nBeat,nBeat); + potential_duplicates.elem(dup) = arma::ones<arma::vec>(dup.size()); + potential_duplicates = trimatu(potential_duplicates); + + int nPartlengths = round((maxlength-minlength)/4)+1; + arma::vec partlengths = zeros<arma::vec>(nPartlengths); + + for (int i = 0; i < nPartlengths; ++ i) + partlengths(i) = (i*4) + minlength; + + // initialise arrays + arma::cube simArray = zeros<arma::cube>(nBeat,nBeat,nPartlengths); + arma::cube decisionArray2 = zeros<arma::cube>(nBeat,nBeat,nPartlengths); + + for (int iLength = 0; iLength < nPartlengths; ++ iLength) + // for (int iLength = 0; iLength < 20; ++ iLength) + { + int len = partlengths(iLength); + int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song + + if (nUsedBeat < 1) continue; + + for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns) + { + arma::uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1); + + for (int i=0; i < (int)help2.size(); ++i) + { + + // measure how well two length len segments go together + int kBeat = help2(i); + arma::vec distrib = median_simmat(span(iBeat,iBeat+len-1),span(kBeat,kBeat+len-1)).diag(0); + simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc); + } + } + + arma::mat tempM = simArray(span(0,nUsedBeat-1),span(0,nUsedBeat-1),span(iLength,iLength)); + simArray.slice(iLength)(span(0,nUsedBeat-1),span(0,nUsedBeat-1)) = tempM + arma::trans(tempM) - (eye<mat>(nUsedBeat,nUsedBeat)%tempM); + + // convolution + arma::vec K = arma::zeros<vec>(3); + K << 0.01 << 0.98 << 0.01; + + + for (int i=0; i < (int)simArray.n_rows; ++i) + { + arma::rowvec t = arma::conv((arma::rowvec)simArray.slice(iLength).row(i),K); + simArray.slice(iLength)(i,span::all) = t.subvec(1,t.size()-2); + } + + // take only over-average bars that do not overlap + + arma::mat temp = arma::zeros<mat>(simArray.n_rows, simArray.n_cols); + temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all,span(0,nUsedBeat-1)); + + for (int i=0; i < (int)temp.n_rows; ++i) + for (int j=0; j < nUsedBeat; ++j) + if (temp(i,j) < thresh_seg) + temp(i,j) = 0; + + decisionArray2.slice(iLength) = temp; + + arma::mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1); + + for (int i=0; i < (int)decisionArray2.n_rows; ++i) + for (int j=0; j < (int)decisionArray2.n_cols; ++j) + if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j)) + decisionArray2.slice(iLength)(i,j) = 0; + + decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % arma::trans(decisionArray2.slice(iLength)); + + for (int i=0; i < (int)simArray.n_rows; ++i) + for (int j=0; j < (int)simArray.n_cols; ++j) + if (simArray.slice(iLength)(i,j) < thresh_seg) + potential_duplicates(i,j) = 0; + } + + // Milk the data + + arma::mat bestval; + + for (int iLength=0; iLength<nPartlengths; ++iLength) + { + arma::mat temp = arma::zeros<arma::mat>(decisionArray2.n_rows,decisionArray2.n_cols); + + for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows) + for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols) + if (decisionArray2.slice(iLength)(rows,cols) > 0) + temp(rows,cols) = 1; + + arma::vec currLogicSum = arma::sum(temp,1); + + for (int iBeat=0; iBeat < nBeat; ++iBeat) + if (currLogicSum(iBeat) > 1) + { + arma::vec t = decisionArray2.slice(iLength)(span::all,iBeat); + double currSum = sum(t); + + int count = 0; + for (int i=0; i < (int)t.size(); ++i) + if (t(i)>0) + count++; + + currSum = (currSum/count)/2; + + arma::rowvec t1; + t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat); + + bestval = join_cols(bestval,t1); + } + } + + // Definition of the resulting vector + vector<Part> parts; + + // make a table of all valid sets of parts + + char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'}; + int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; + arma::vec valid_sets = arma::ones<arma::vec>(bestval.n_rows); + + if (!bestval.is_empty()) + { + + // In questo punto viene introdotto un errore alla 3 cifra decimale + + arma::colvec t = arma::zeros<arma::colvec>(bestval.n_rows); + for (int i=0; i < (int)bestval.n_rows; ++i) + { + t(i) = bestval(i,1)*2; + } + + double m = t.max(); + + bestval(span::all,1) = bestval(span::all,1) / m; + bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1); + + arma::mat bestval2; + for (int i=0; i < (int)bestval.n_cols; ++i) + if (i!=1) + bestval2 = join_rows(bestval2,bestval.col(i)); + + for (int kSeg=0; kSeg<6; ++kSeg) + { + arma::mat currbestvals = arma::zeros<arma::mat>(bestval2.n_rows, bestval2.n_cols); + for (int i=0; i < (int)bestval2.n_rows; ++i) + for (int j=0; j < (int)bestval2.n_cols; ++j) + if (valid_sets(i)) + currbestvals(i,j) = bestval2(i,j); + + arma::vec t1 = currbestvals.col(0); + double ma; + uword maIdx; + ma = t1.max(maIdx); + + if ((maIdx == 0)&&(ma == 0)) + break; + + int bestLength = lrint(partlengths(currbestvals(maIdx,1))); + arma::rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2),span::all); + + arma::rowvec bestIndicesMap = arma::zeros<arma::rowvec>(bestIndices.size()); + for (int i=0; i < (int)bestIndices.size(); ++i) + if (bestIndices(i)>0) + bestIndicesMap(i) = 1; + + arma::rowvec mask = arma::zeros<arma::rowvec>(bestLength*2-1); + for (int i=0; i<bestLength; ++i) + mask(i+bestLength-1) = 1; + + arma::rowvec t2 = arma::conv(bestIndicesMap,mask); + arma::rowvec island = t2.subvec(mask.size()/2,t2.size()-1-mask.size()/2); + + // Save results in the structure + Part newPart; + newPart.n = bestLength; + uvec q1 = find(bestIndices > 0); + + for (int i=0; i < (int)q1.size();++i) + newPart.indices.push_back(q1(i)); + + newPart.letter = partletters[kSeg]; + newPart.value = partvalues[kSeg]; + newPart.level = kSeg+1; + parts.push_back(newPart); + + uvec q2 = find(valid_sets==1); + + for (int i=0; i < (int)q2.size(); ++i) + { + int iSet = q2(i); + int s = partlengths(bestval2(iSet,1)); + + arma::rowvec mask1 = arma::zeros<arma::rowvec>(s*2-1); + for (int i=0; i<s; ++i) + mask1(i+s-1) = 1; + + arma::rowvec Ind = decisionArray2.slice(bestval2(iSet,1))(bestval2(iSet,2),span::all); + arma::rowvec IndMap = arma::zeros<arma::rowvec>(Ind.size()); + for (int i=0; i < (int)Ind.size(); ++i) + if (Ind(i)>0) + IndMap(i) = 2; + + arma::rowvec t3 = arma::conv(IndMap,mask1); + arma::rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2); + arma::rowvec islandsdMult = currislands%island; + + arma::uvec islandsIndex = find(islandsdMult > 0); + + if (islandsIndex.size() > 0) + valid_sets(iSet) = 0; + } + } + } + else + { + Part newPart; + newPart.n = nBeat; + newPart.indices.push_back(0); + newPart.letter = 'A'; + newPart.value = 1; + newPart.level = 1; + parts.push_back(newPart); + } + + arma::vec bar = linspace(1,nBeat,nBeat); + Part np = nullpart(parts,bar); + + parts.push_back(np); + + // -------------- NOT CONVERTED ------------------------------------- + // if param.seg.editor + // [pa, ta] = partarray(parts); + // parts = editorssearch(pa, ta, parts); + // parts = [parts, nullpart(parts,1:nBeat)]; + // end + // ------------------------------------------------------------------ + + + mergenulls(parts); + + + // -------------- NOT CONVERTED ------------------------------------- + // if param.seg.editor + // [pa, ta] = partarray(parts); + // parts = editorssearch(pa, ta, parts); + // parts = [parts, nullpart(parts,1:nBeat)]; + // end + // ------------------------------------------------------------------ + + return parts; +} + + + +void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector<Part> &parts) +{ + // Collect Info + int nBeat = quantisedChromagram.size(); // Number of feature vector + int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector + + arma::mat synchTreble = arma::zeros<mat>(nBeat,nFeatValues/2); + + for (int i = 0; i < nBeat; ++ i) + for (int j = 0; j < nFeatValues/2; ++ j) + { + synchTreble(i,j) = quantisedChromagram[i].values[j]; + } + + arma::mat synchBass = arma::zeros<mat>(nBeat,nFeatValues/2); + + for (int i = 0; i < nBeat; ++ i) + for (int j = 0; j < nFeatValues/2; ++ j) + { + synchBass(i,j) = quantisedChromagram[i].values[j+12]; + } + + // Process + + arma::mat segTreble = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); + arma::mat segBass = arma::zeros<arma::mat>(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); + + for (int iPart=0; iPart < (int)parts.size(); ++iPart) + { + parts[iPart].nInd = parts[iPart].indices.size(); + + for (int kOccur=0; kOccur<parts[iPart].nInd; ++kOccur) + { + int kStartIndex = parts[iPart].indices[kOccur]; + int kEndIndex = kStartIndex + parts[iPart].n-1; + + segTreble.rows(kStartIndex,kEndIndex) = segTreble.rows(kStartIndex,kEndIndex) + synchTreble.rows(kStartIndex,kEndIndex); + segBass.rows(kStartIndex,kEndIndex) = segBass.rows(kStartIndex,kEndIndex) + synchBass.rows(kStartIndex,kEndIndex); + } + } +} + + +// Segment Integration +vector<Part> songSegmentIntegration(vector<Part> &parts) +{ + // Break up parts (every part will have one instance) + vector<Part> newPartVector; + vector<int> partindices; + + for (int iPart=0; iPart < (int)parts.size(); ++iPart) + { + parts[iPart].nInd = parts[iPart].indices.size(); + for (int iInstance=0; iInstance<parts[iPart].nInd; ++iInstance) + { + Part newPart; + newPart.n = parts[iPart].n; + newPart.letter = parts[iPart].letter; + newPart.value = parts[iPart].value; + newPart.level = parts[iPart].level; + newPart.indices.push_back(parts[iPart].indices[iInstance]); + newPart.nInd = 1; + partindices.push_back(parts[iPart].indices[iInstance]); + + newPartVector.push_back(newPart); + } + } + + + // Sort the parts in order of occurrence + sort (partindices.begin(), partindices.end()); + + for (int i=0; i < (int)partindices.size(); ++i) + { + bool found = false; + int in=0; + while (!found) + { + if (newPartVector[in].indices[0] == partindices[i]) + { + newPartVector.push_back(newPartVector[in]); + newPartVector.erase(newPartVector.begin()+in); + found = true; + } + else + in++; + } + } + + // Clear the vector + for (int iNewpart=1; iNewpart < (int)newPartVector.size(); ++iNewpart) + { + if (newPartVector[iNewpart].n < 12) + { + newPartVector[iNewpart-1].n = newPartVector[iNewpart-1].n + newPartVector[iNewpart].n; + newPartVector.erase(newPartVector.begin()+iNewpart); + } + } + + return newPartVector; +} + +// Segmenter +Vamp::Plugin::FeatureList Segmentino::runSegmenter(Vamp::Plugin::FeatureList quantisedChromagram) +{ + /* --- Display Information --- */ +// int numBeat = quantisedChromagram.size(); +// int numFeats = quantisedChromagram[0].values.size(); + + vector<Part> parts; + vector<Part> finalParts; + + parts = songSegment(quantisedChromagram); + songSegmentChroma(quantisedChromagram,parts); + + finalParts = songSegmentIntegration(parts); + + + // TEMP ---- + /*for (int i=0;i<finalParts.size(); ++i) + { + std::cout << "Parts n° " << i << std::endl; + std::cout << "n°: " << finalParts[i].n << std::endl; + std::cout << "letter: " << finalParts[i].letter << std::endl; + + std::cout << "indices: "; + for (int j=0;j<finalParts[i].indices.size(); ++j) + std::cout << finalParts[i].indices[j] << " "; + + std::cout << std::endl; + std::cout << "level: " << finalParts[i].level << std::endl; + }*/ + + // --------- + + + // Output + + Vamp::Plugin::FeatureList results; + + + Feature seg; + + arma::vec indices; +// int idx=0; + vector<int> values; + vector<string> letters; + + for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart) + { + int iInstance=0; + seg.hasTimestamp = true; + + int ind = finalParts[iPart].indices[iInstance]; + int ind1 = finalParts[iPart+1].indices[iInstance]; + + seg.timestamp = quantisedChromagram[ind].timestamp; + seg.hasDuration = true; + seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp; + seg.values.clear(); + seg.values.push_back(finalParts[iPart].value); + seg.label = finalParts[iPart].letter; + + results.push_back(seg); + } + + if (finalParts.size() > 0) { + int ind = finalParts[finalParts.size()-1].indices[0]; + seg.hasTimestamp = true; + seg.timestamp = quantisedChromagram[ind].timestamp; + seg.hasDuration = true; + seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp; + seg.values.clear(); + seg.values.push_back(finalParts[finalParts.size()-1].value); + seg.label = finalParts[finalParts.size()-1].letter; + + results.push_back(seg); + } + + return results; +} + + + + + + + + + + + + + + + + +