max@1: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ max@1: max@1: /* Chris@48: Segmentino max@1: Chris@48: Code by Massimiliano Zanoni and Matthias Mauch Chris@48: Centre for Digital Music, Queen Mary, University of London Chris@48: Chris@48: Copyright 2009-2013 Queen Mary, University of London. max@1: Chris@65: This program is free software: you can redistribute it and/or Chris@65: modify it under the terms of the GNU Affero General Public License Chris@65: as published by the Free Software Foundation, either version 3 of Chris@65: the License, or (at your option) any later version. See the file max@1: COPYING included with this distribution for more information. max@1: */ max@1: Chris@48: #include "Segmentino.h" max@1: Chris@49: #include Chris@49: #include Chris@49: #include Chris@49: #include Chris@49: #include Chris@49: #include Chris@49: #include Chris@49: Chris@49: #include Chris@49: Chris@49: #include Chris@49: max@1: #include max@1: #include max@1: #include max@1: #include max@1: max@1: #include max@1: Chris@56: using arma::colvec; Chris@56: using arma::conv; Chris@56: using arma::cor; Chris@56: using arma::cube; Chris@56: using arma::eye; Chris@56: using arma::imat; Chris@56: using arma::irowvec; Chris@56: using arma::linspace; Chris@56: using arma::mat; Chris@56: using arma::max; Chris@56: using arma::ones; Chris@56: using arma::rowvec; Chris@56: using arma::sort; Chris@56: using arma::span; Chris@56: using arma::sum; Chris@56: using arma::trans; Chris@56: using arma::uvec; Chris@56: using arma::uword; Chris@56: using arma::vec; Chris@56: using arma::zeros; Chris@56: max@1: using std::string; max@1: using std::vector; max@1: using std::cerr; max@1: using std::cout; max@1: using std::endl; max@1: max@1: // Result Struct max@1: typedef struct Part { max@1: int n; Chris@21: vector indices; max@1: string letter; Chris@21: int value; max@1: int level; max@1: int nInd; max@1: }Part; max@1: max@1: max@8: max@1: /* ------------------------------------ */ max@1: /* ----- BEAT DETECTOR CLASS ---------- */ max@1: /* ------------------------------------ */ max@1: max@1: class BeatTrackerData max@1: { max@1: /* --- ATTRIBUTES --- */ max@1: public: max@1: DFConfig dfConfig; max@1: DetectionFunction *df; max@1: DownBeat *downBeat; max@1: vector dfOutput; max@1: Vamp::RealTime origin; max@1: max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ max@1: public: max@1: BeatTrackerData(float rate, const DFConfig &config) : dfConfig(config) { Chris@22: max@1: df = new DetectionFunction(config); max@1: // decimation factor aims at resampling to c. 3KHz; must be power of 2 max@1: int factor = MathUtilities::nextPowerOfTwo(rate / 3000); max@1: // std::cerr << "BeatTrackerData: factor = " << factor << std::endl; max@1: downBeat = new DownBeat(rate, factor, config.stepSize); max@1: } max@1: max@1: /* --- Desctructor --- */ max@1: ~BeatTrackerData() { Chris@22: delete df; max@1: delete downBeat; max@1: } max@1: max@1: void reset() { max@1: delete df; max@1: df = new DetectionFunction(dfConfig); max@1: dfOutput.clear(); max@1: downBeat->resetAudioBuffer(); max@1: origin = Vamp::RealTime::zeroTime; max@1: } max@1: }; max@1: max@1: max@1: /* --------------------------------------- */ max@1: /* ----- CHROMA EXTRACTOR CLASS ---------- */ max@1: /* --------------------------------------- */ max@1: max@1: class ChromaData max@1: { max@1: max@1: /* --- ATTRIBUTES --- */ max@1: max@1: public: max@1: int frameCount; max@1: int nBPS; max@1: Vamp::Plugin::FeatureList logSpectrum; Chris@37: int blockSize; max@1: int lengthOfNoteIndex; max@1: vector meanTunings; max@1: vector localTunings; max@1: float whitening; max@1: float preset; max@1: float useNNLS; max@1: vector localTuning; max@1: vector kernelValue; max@1: vector kernelFftIndex; max@1: vector kernelNoteIndex; max@1: float *dict; max@1: bool tuneLocal; max@1: float doNormalizeChroma; max@1: float rollon; max@1: float s; max@1: vector hw; max@1: vector sinvalues; max@1: vector cosvalues; max@1: Window window; max@1: FFTReal fft; Chris@37: int inputSampleRate; max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ max@1: max@1: public: max@1: ChromaData(float inputSampleRate, size_t block_size) : max@1: frameCount(0), max@1: nBPS(3), max@1: logSpectrum(0), max@1: blockSize(0), max@1: lengthOfNoteIndex(0), max@1: meanTunings(0), max@1: localTunings(0), max@1: whitening(1.0), max@1: preset(0.0), max@1: useNNLS(1.0), max@1: localTuning(0.0), max@1: kernelValue(0), max@1: kernelFftIndex(0), max@1: kernelNoteIndex(0), max@1: dict(0), max@1: tuneLocal(0.0), max@1: doNormalizeChroma(0), max@1: rollon(0.0), Chris@35: s(0.7), Chris@35: sinvalues(0), Chris@35: cosvalues(0), Chris@35: window(HanningWindow, block_size), Chris@35: fft(block_size), Chris@35: inputSampleRate(inputSampleRate) max@1: { max@1: // make the *note* dictionary matrix max@1: dict = new float[nNote * 84]; max@1: for (int i = 0; i < nNote * 84; ++i) dict[i] = 0.0; max@1: blockSize = block_size; max@1: } max@1: max@1: max@1: /* --- Desctructor --- */ max@1: max@1: ~ChromaData() { max@1: delete [] dict; max@1: } max@1: max@1: /* --- Public Methods --- */ max@1: max@1: void reset() { max@1: frameCount = 0; max@1: logSpectrum.clear(); max@1: for (int iBPS = 0; iBPS < 3; ++iBPS) { max@1: meanTunings[iBPS] = 0; max@1: localTunings[iBPS] = 0; max@1: } max@1: localTuning.clear(); max@1: } max@1: max@1: void baseProcess(float *inputBuffers, Vamp::RealTime timestamp) max@1: { Chris@22: max@1: frameCount++; max@1: float *magnitude = new float[blockSize/2]; max@1: double *fftReal = new double[blockSize]; max@1: double *fftImag = new double[blockSize]; max@1: max@1: // FFTReal wants doubles, so we need to make a local copy of inputBuffers max@1: double *inputBuffersDouble = new double[blockSize]; Chris@37: for (int i = 0; i < blockSize; i++) inputBuffersDouble[i] = inputBuffers[i]; max@1: Chris@65: fft.forward(inputBuffersDouble, fftReal, fftImag); max@1: max@1: float energysum = 0; max@1: // make magnitude max@1: float maxmag = -10000; max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) { max@1: magnitude[iBin] = sqrt(fftReal[iBin] * fftReal[iBin] + max@1: fftImag[iBin] * fftImag[iBin]); max@1: if (magnitude[iBin]>blockSize*1.0) magnitude[iBin] = blockSize; max@1: // a valid audio signal (between -1 and 1) should not be limited here. max@1: if (maxmag < magnitude[iBin]) maxmag = magnitude[iBin]; max@1: if (rollon > 0) { max@1: energysum += pow(magnitude[iBin],2); max@1: } max@1: } max@1: max@1: float cumenergy = 0; max@1: if (rollon > 0) { max@1: for (int iBin = 2; iBin < static_cast(blockSize/2); iBin++) { max@1: cumenergy += pow(magnitude[iBin],2); max@1: if (cumenergy < energysum * rollon / 100) magnitude[iBin-2] = 0; max@1: else break; max@1: } max@1: } max@1: max@1: if (maxmag < 2) { max@1: // cerr << "timestamp " << timestamp << ": very low magnitude, setting magnitude to all zeros" << endl; max@1: for (int iBin = 0; iBin < static_cast(blockSize/2); iBin++) { max@1: magnitude[iBin] = 0; max@1: } max@1: } max@1: max@1: // cerr << magnitude[200] << endl; max@1: max@1: // note magnitude mapping using pre-calculated matrix max@1: float *nm = new float[nNote]; // note magnitude max@1: for (int iNote = 0; iNote < nNote; iNote++) { max@1: nm[iNote] = 0; // initialise as 0 max@1: } max@1: int binCount = 0; max@1: for (vector::iterator it = kernelValue.begin(); it != kernelValue.end(); ++it) { max@1: nm[kernelNoteIndex[binCount]] += magnitude[kernelFftIndex[binCount]] * kernelValue[binCount]; max@1: binCount++; max@1: } max@1: max@1: float one_over_N = 1.0/frameCount; max@1: // update means of complex tuning variables max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] *= float(frameCount-1)*one_over_N; max@1: max@1: for (int iTone = 0; iTone < round(nNote*0.62/nBPS)*nBPS+1; iTone = iTone + nBPS) { max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) meanTunings[iBPS] += nm[iTone + iBPS]*one_over_N; max@1: float ratioOld = 0.997; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: localTunings[iBPS] *= ratioOld; max@1: localTunings[iBPS] += nm[iTone + iBPS] * (1 - ratioOld); max@1: } max@1: } max@1: max@1: float localTuningImag = 0; max@1: float localTuningReal = 0; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: localTuningReal += localTunings[iBPS] * cosvalues[iBPS]; max@1: localTuningImag += localTunings[iBPS] * sinvalues[iBPS]; max@1: } max@1: max@1: float normalisedtuning = atan2(localTuningImag, localTuningReal)/(2*M_PI); max@1: localTuning.push_back(normalisedtuning); max@1: max@1: Vamp::Plugin::Feature f1; // logfreqspec max@1: f1.hasTimestamp = true; max@1: f1.timestamp = timestamp; max@1: for (int iNote = 0; iNote < nNote; iNote++) { max@1: f1.values.push_back(nm[iNote]); max@1: } max@1: max@1: // deletes max@1: delete[] inputBuffersDouble; max@1: delete[] magnitude; max@1: delete[] fftReal; max@1: delete[] fftImag; max@1: delete[] nm; max@1: max@1: logSpectrum.push_back(f1); // remember note magnitude max@1: } max@1: max@1: bool initialise() max@1: { max@1: dictionaryMatrix(dict, s); Chris@22: Chris@37: // make things for tuning estimation Chris@37: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: sinvalues.push_back(sin(2*M_PI*(iBPS*1.0/nBPS))); max@1: cosvalues.push_back(cos(2*M_PI*(iBPS*1.0/nBPS))); max@1: } max@1: Chris@22: Chris@37: // make hamming window of length 1/2 octave Chris@37: int hamwinlength = nBPS * 6 + 1; max@1: float hamwinsum = 0; max@1: for (int i = 0; i < hamwinlength; ++i) { max@1: hw.push_back(0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1))); max@1: hamwinsum += 0.54 - 0.46 * cos((2*M_PI*i)/(hamwinlength-1)); max@1: } max@1: for (int i = 0; i < hamwinlength; ++i) hw[i] = hw[i] / hamwinsum; max@1: max@1: max@1: // initialise the tuning max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: meanTunings.push_back(0); max@1: localTunings.push_back(0); max@1: } Chris@22: max@1: frameCount = 0; max@1: int tempn = nNote * blockSize/2; max@1: // cerr << "length of tempkernel : " << tempn << endl; max@1: float *tempkernel; max@1: max@1: tempkernel = new float[tempn]; max@1: max@1: logFreqMatrix(inputSampleRate, blockSize, tempkernel); max@1: kernelValue.clear(); max@1: kernelFftIndex.clear(); max@1: kernelNoteIndex.clear(); max@1: int countNonzero = 0; max@1: for (int iNote = 0; iNote < nNote; ++iNote) { max@1: // I don't know if this is wise: manually making a sparse matrix max@1: for (int iFFT = 0; iFFT < static_cast(blockSize/2); ++iFFT) { max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { max@1: kernelValue.push_back(tempkernel[iFFT + blockSize/2 * iNote]); max@1: if (tempkernel[iFFT + blockSize/2 * iNote] > 0) { max@1: countNonzero++; max@1: } max@1: kernelFftIndex.push_back(iFFT); Chris@23: kernelNoteIndex.push_back(iNote); max@1: } max@1: } max@1: } max@1: delete [] tempkernel; Chris@37: Chris@37: return true; max@1: } max@1: }; max@1: max@1: max@1: /* --------------------------------- */ max@1: /* ----- SONG PARTITIONER ---------- */ max@1: /* --------------------------------- */ max@1: max@1: max@1: /* --- ATTRIBUTES --- */ max@1: Chris@48: float Segmentino::m_stepSecs = 0.01161; // 512 samples at 44100 Chris@48: int Segmentino::m_chromaFramesizeFactor = 16; // 16 times as long as beat tracker's Chris@48: int Segmentino::m_chromaStepsizeFactor = 4; // 4 times as long as beat tracker's max@1: max@1: max@1: /* --- METHODS --- */ max@1: max@1: /* --- Constructor --- */ Chris@48: Segmentino::Segmentino(float inputSampleRate) : max@1: Vamp::Plugin(inputSampleRate), max@1: m_d(0), Chris@35: m_chromadata(0), max@1: m_bpb(4), max@1: m_pluginFrameCount(0) max@1: { max@1: } max@1: max@1: max@1: /* --- Desctructor --- */ Chris@48: Segmentino::~Segmentino() max@1: { max@1: delete m_d; Chris@35: delete m_chromadata; max@1: } max@1: max@1: max@1: /* --- Methods --- */ Chris@48: string Segmentino::getIdentifier() const max@1: { Chris@54: return "segmentino"; max@1: } max@1: Chris@48: string Segmentino::getName() const max@1: { Chris@54: return "Segmentino"; max@1: } max@1: Chris@48: string Segmentino::getDescription() const max@1: { max@1: return "Estimate contiguous segments pertaining to song parts such as verse and chorus."; max@1: } max@1: Chris@48: string Segmentino::getMaker() const max@1: { max@1: return "Queen Mary, University of London"; max@1: } max@1: Chris@48: int Segmentino::getPluginVersion() const max@1: { Chris@81: return 3; max@1: } max@1: Chris@48: string Segmentino::getCopyright() const max@1: { Chris@81: return "Plugin by Matthew Davies, Christian Landone, Chris Cannam, Matthias Mauch and Massimiliano Zanoni Copyright (c) 2006-2019 QMUL - Affero GPL"; max@1: } max@1: Chris@48: Segmentino::ParameterList Segmentino::getParameterDescriptors() const max@1: { max@1: ParameterList list; max@1: max@1: ParameterDescriptor desc; max@1: matthiasm@46: // desc.identifier = "bpb"; matthiasm@46: // desc.name = "Beats per Bar"; matthiasm@46: // desc.description = "The number of beats in each bar"; matthiasm@46: // desc.minValue = 2; matthiasm@46: // desc.maxValue = 16; matthiasm@46: // desc.defaultValue = 4; matthiasm@46: // desc.isQuantized = true; matthiasm@46: // desc.quantizeStep = 1; matthiasm@46: // list.push_back(desc); max@1: max@1: return list; max@1: } max@1: Chris@48: float Segmentino::getParameter(std::string name) const max@1: { max@1: if (name == "bpb") return m_bpb; max@1: return 0.0; max@1: } max@1: Chris@48: void Segmentino::setParameter(std::string name, float value) max@1: { max@1: if (name == "bpb") m_bpb = lrintf(value); max@1: } max@1: max@1: max@1: // Return the StepSize for Chroma Extractor Chris@48: size_t Segmentino::getPreferredStepSize() const max@1: { max@1: size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001); max@1: if (step < 1) step = 1; max@1: max@1: return step; max@1: } max@1: max@1: // Return the BlockSize for Chroma Extractor Chris@48: size_t Segmentino::getPreferredBlockSize() const max@1: { Chris@50: int theoretical = getPreferredStepSize() * 2; max@1: theoretical *= m_chromaFramesizeFactor; Chris@50: return MathUtilities::nextPowerOfTwo(theoretical); max@1: } max@1: max@1: max@1: // Initialize the plugin and define Beat Tracker and Chroma Extractor Objects Chris@48: bool Segmentino::initialise(size_t channels, size_t stepSize, size_t blockSize) max@1: { max@1: if (m_d) { Chris@22: delete m_d; Chris@22: m_d = 0; max@1: } Chris@35: if (m_chromadata) { Chris@35: delete m_chromadata; Chris@35: m_chromadata = 0; Chris@35: } max@1: max@1: if (channels < getMinChannelCount() || Chris@22: channels > getMaxChannelCount()) { Chris@48: std::cerr << "Segmentino::initialise: Unsupported channel count: " max@1: << channels << std::endl; max@1: return false; max@1: } max@1: max@1: if (stepSize != getPreferredStepSize()) { Chris@48: std::cerr << "ERROR: Segmentino::initialise: Unsupported step size for this sample rate: " max@1: << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl; max@1: return false; max@1: } max@1: max@1: if (blockSize != getPreferredBlockSize()) { Chris@48: std::cerr << "WARNING: Segmentino::initialise: Sub-optimal block size for this sample rate: " max@1: << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl; max@1: } max@1: max@1: // Beat tracker and Chroma extractor has two different configuration parameters max@1: max@1: // Configuration Parameters for Beat Tracker max@1: DFConfig dfConfig; max@1: dfConfig.DFType = DF_COMPLEXSD; max@1: dfConfig.stepSize = stepSize; max@1: dfConfig.frameLength = blockSize / m_chromaFramesizeFactor; max@1: dfConfig.dbRise = 3; max@1: dfConfig.adaptiveWhitening = false; max@1: dfConfig.whiteningRelaxCoeff = -1; max@1: dfConfig.whiteningFloor = -1; max@1: max@1: // Initialise Beat Tracker max@1: m_d = new BeatTrackerData(m_inputSampleRate, dfConfig); max@1: m_d->downBeat->setBeatsPerBar(m_bpb); max@1: max@1: // Initialise Chroma Extractor max@1: m_chromadata = new ChromaData(m_inputSampleRate, blockSize); max@1: m_chromadata->initialise(); max@1: matthiasm@59: // definition of outputs numbers used internally matthiasm@59: int outputCounter = 1; matthiasm@59: m_beatOutputNumber = outputCounter++; matthiasm@59: m_barsOutputNumber = outputCounter++; matthiasm@59: m_beatcountsOutputNumber = outputCounter++; matthiasm@59: m_beatsdOutputNumber = outputCounter++; matthiasm@59: m_logscalespecOutputNumber = outputCounter++; matthiasm@59: m_bothchromaOutputNumber = outputCounter++; matthiasm@59: m_qchromafwOutputNumber = outputCounter++; matthiasm@59: m_qchromaOutputNumber = outputCounter++; matthiasm@59: max@1: return true; max@1: } max@1: Chris@48: void Segmentino::reset() max@1: { max@1: if (m_d) m_d->reset(); Chris@38: if (m_chromadata) m_chromadata->reset(); max@1: m_pluginFrameCount = 0; max@1: } max@1: Chris@48: Segmentino::OutputList Segmentino::getOutputDescriptors() const max@1: { matthiasm@59: max@1: OutputList list; max@1: max@1: OutputDescriptor segm; Chris@15: segm.identifier = "segmentation"; max@1: segm.name = "Segmentation"; max@1: segm.description = "Segmentation"; max@1: segm.unit = "segment-type"; max@1: segm.hasFixedBinCount = true; max@1: //segm.binCount = 24; max@1: segm.binCount = 1; max@1: segm.hasKnownExtents = true; max@1: segm.minValue = 1; max@1: segm.maxValue = 5; max@1: segm.isQuantized = true; max@1: segm.quantizeStep = 1; max@1: segm.sampleType = OutputDescriptor::VariableSampleRate; Chris@17: segm.sampleRate = 1.0 / m_stepSecs; max@1: segm.hasDuration = true; matthiasm@59: m_segmOutputNumber = 0; matthiasm@59: max@1: list.push_back(segm); max@1: max@1: return list; max@1: } max@1: max@1: // Executed for each frame - called from the host max@1: max@1: // We use time domain input, because DownBeat requires it -- so we max@1: // use the time-domain version of DetectionFunction::process which max@1: // does its own FFT. It requires doubles as input, so we need to max@1: // make a temporary copy max@1: max@1: // We only support a single input channel Chris@65: Segmentino::FeatureSet Segmentino::process(const float *const *inputBuffers, Chris@65: Vamp::RealTime timestamp) max@1: { max@1: if (!m_d) { Chris@48: cerr << "ERROR: Segmentino::process: " Chris@48: << "Segmentino has not been initialised" Chris@22: << endl; Chris@22: return FeatureSet(); max@1: } max@1: max@1: const int fl = m_d->dfConfig.frameLength; Chris@67: max@1: int sampleOffset = ((m_chromaFramesizeFactor-1) * fl) / 2; max@1: Chris@67: double *dfinput = new double[fl]; Chris@67: max@1: // Since chroma needs a much longer frame size, we only ever use the very max@1: // beginning of the frame for beat tracking. max@1: for (int i = 0; i < fl; ++i) dfinput[i] = inputBuffers[0][i]; Chris@65: double output = m_d->df->processTimeDomain(dfinput); max@1: Chris@67: delete[] dfinput; Chris@67: max@1: if (m_d->dfOutput.empty()) m_d->origin = timestamp; max@1: max@1: // std::cerr << "df[" << m_d->dfOutput.size() << "] is " << output << std::endl; max@1: m_d->dfOutput.push_back(output); max@1: max@1: // Downsample and store the incoming audio block. max@1: // We have an overlap on the incoming audio stream (step size is max@1: // half block size) -- this function is configured to take only a max@1: // step size's worth, so effectively ignoring the overlap. Note max@1: // however that this means we omit the last blocksize - stepsize max@1: // samples completely for the purposes of barline detection max@1: // (hopefully not a problem) max@1: m_d->downBeat->pushAudioBlock(inputBuffers[0]); max@1: max@1: // The following is not done every time, but only every m_chromaFramesizeFactor times, max@1: // because the chroma does not need dense time frames. max@1: max@1: if (m_pluginFrameCount % m_chromaStepsizeFactor == 0) max@1: { max@1: max@1: // Window the full time domain, data, FFT it and process chroma stuff. max@1: Chris@67: float *windowedBuffers = new float[m_chromadata->blockSize]; Chris@67: max@1: m_chromadata->window.cut(&inputBuffers[0][0], &windowedBuffers[0]); max@1: max@1: // adjust timestamp (we want the middle of the frame) Chris@67: timestamp = timestamp + Chris@67: Vamp::RealTime::frame2RealTime(sampleOffset, lrintf(m_inputSampleRate)); max@1: max@1: m_chromadata->baseProcess(&windowedBuffers[0], timestamp); Chris@67: Chris@67: delete[] windowedBuffers; max@1: } Chris@67: max@1: m_pluginFrameCount++; max@1: max@1: FeatureSet fs; max@1: return fs; max@1: } max@1: Chris@48: Segmentino::FeatureSet Segmentino::getRemainingFeatures() max@1: { max@1: if (!m_d) { Chris@48: cerr << "ERROR: Segmentino::getRemainingFeatures: " Chris@48: << "Segmentino has not been initialised" Chris@22: << endl; Chris@22: return FeatureSet(); max@1: } max@1: matthiasm@59: FeatureSet masterFeatureset; matthiasm@59: FeatureSet internalFeatureset = beatTrack(); matthiasm@59: matthiasm@59: int beatcount = internalFeatureset[m_beatOutputNumber].size(); Chris@49: if (beatcount == 0) return Segmentino::FeatureSet(); matthiasm@59: Vamp::RealTime last_beattime = internalFeatureset[m_beatOutputNumber][beatcount-1].timestamp; matthiasm@59: matthiasm@60: // // THIS FOLLOWING BIT IS WEIRD! REPLACES BEAT-TRACKED BEATS WITH matthiasm@60: // // UNIFORM 0.5 SEC BEATS matthiasm@59: // internalFeatureset[m_beatOutputNumber].clear(); matthiasm@59: // Vamp::RealTime beattime = Vamp::RealTime::fromSeconds(1.0); matthiasm@59: // while (beattime < last_beattime) matthiasm@59: // { matthiasm@59: // Feature beatfeature; matthiasm@59: // beatfeature.hasTimestamp = true; matthiasm@59: // beatfeature.timestamp = beattime; matthiasm@59: // masterFeatureset[m_beatOutputNumber].push_back(beatfeature); matthiasm@59: // beattime = beattime + Vamp::RealTime::fromSeconds(0.5); matthiasm@59: // } matthiasm@46: Chris@16: FeatureList chromaList = chromaFeatures(); max@1: Chris@37: for (int i = 0; i < (int)chromaList.size(); ++i) max@1: { matthiasm@59: internalFeatureset[m_bothchromaOutputNumber].push_back(chromaList[i]); max@1: } max@1: max@1: // quantised and pseudo-quantised (beat-wise) chroma matthiasm@59: std::vector quantisedChroma = beatQuantiser(chromaList, internalFeatureset[m_beatOutputNumber]); Chris@32: Chris@32: if (quantisedChroma.empty()) return masterFeatureset; max@1: matthiasm@59: internalFeatureset[m_qchromafwOutputNumber] = quantisedChroma[0]; matthiasm@59: internalFeatureset[m_qchromaOutputNumber] = quantisedChroma[1]; max@1: max@1: // Segmentation Chris@39: try { Chris@39: masterFeatureset[m_segmOutputNumber] = runSegmenter(quantisedChroma[1]); Chris@39: } catch (std::bad_alloc &a) { Chris@48: cerr << "ERROR: Segmentino::getRemainingFeatures: Failed to run segmenter, not enough memory (song too long?)" << endl; Chris@39: } max@1: max@1: return(masterFeatureset); max@1: } max@1: max@1: /* ------ Beat Tracker ------ */ max@1: Chris@48: Segmentino::FeatureSet Segmentino::beatTrack() max@1: { max@1: vector df; max@1: vector beatPeriod; max@1: vector tempi; max@1: Chris@37: for (int i = 2; i < (int)m_d->dfOutput.size(); ++i) { // discard first two elts max@1: df.push_back(m_d->dfOutput[i]); max@1: beatPeriod.push_back(0.0); max@1: } max@1: if (df.empty()) return FeatureSet(); max@1: max@1: TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize); max@1: tt.calculateBeatPeriod(df, beatPeriod, tempi); max@1: max@1: vector beats; max@1: tt.calculateBeats(df, beatPeriod, beats); max@1: max@1: vector downbeats; max@1: size_t downLength = 0; max@1: const float *downsampled = m_d->downBeat->getBufferedAudio(downLength); max@1: m_d->downBeat->findDownBeats(downsampled, downLength, beats, downbeats); max@1: max@1: vector beatsd; max@1: m_d->downBeat->getBeatSD(beatsd); max@1: max@1: /*std::cout << "BeatTracker: found downbeats at: "; max@1: for (int i = 0; i < downbeats.size(); ++i) std::cout << downbeats[i] << " " << std::endl;*/ max@1: max@1: FeatureSet returnFeatures; max@1: max@1: char label[20]; max@1: max@1: int dbi = 0; max@1: int beat = 0; max@1: int bar = 0; max@1: max@1: if (!downbeats.empty()) { max@1: // get the right number for the first beat; this will be max@1: // incremented before use (at top of the following loop) max@1: int firstDown = downbeats[0]; max@1: beat = m_bpb - firstDown - 1; max@1: if (beat == m_bpb) beat = 0; max@1: } max@1: Chris@37: for (int i = 0; i < (int)beats.size(); ++i) { max@1: Chris@37: int frame = beats[i] * m_d->dfConfig.stepSize; max@1: Chris@37: if (dbi < (int)downbeats.size() && i == downbeats[dbi]) { max@1: beat = 0; max@1: ++bar; max@1: ++dbi; max@1: } else { max@1: ++beat; max@1: } max@1: max@1: /* Ooutput Section */ max@1: max@1: // outputs are: max@1: // max@1: // 0 -> beats max@1: // 1 -> bars max@1: // 2 -> beat counter function max@1: max@1: Feature feature; max@1: feature.hasTimestamp = true; max@1: feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime (frame, lrintf(m_inputSampleRate)); max@1: max@1: sprintf(label, "%d", beat + 1); max@1: feature.label = label; max@1: returnFeatures[m_beatOutputNumber].push_back(feature); // labelled beats max@1: max@1: feature.values.push_back(beat + 1); max@1: returnFeatures[m_beatcountsOutputNumber].push_back(feature); // beat function max@1: Chris@37: if (i > 0 && i <= (int)beatsd.size()) { max@1: feature.values.clear(); max@1: feature.values.push_back(beatsd[i-1]); max@1: feature.label = ""; max@1: returnFeatures[m_beatsdOutputNumber].push_back(feature); // beat spectral difference max@1: } max@1: max@1: if (beat == 0) { max@1: feature.values.clear(); max@1: sprintf(label, "%d", bar); max@1: feature.label = label; max@1: returnFeatures[m_barsOutputNumber].push_back(feature); // bars max@1: } max@1: } max@1: max@1: return returnFeatures; max@1: } max@1: max@1: max@1: /* ------ Chroma Extractor ------ */ max@1: Chris@48: Segmentino::FeatureList Segmentino::chromaFeatures() max@1: { max@1: max@1: FeatureList returnFeatureList; max@1: FeatureList tunedlogfreqspec; max@1: max@1: if (m_chromadata->logSpectrum.size() == 0) return returnFeatureList; max@1: max@1: /** Calculate Tuning max@1: calculate tuning from (using the angle of the complex number defined by the max@1: cumulative mean real and imag values) max@1: **/ max@1: float meanTuningImag = 0; max@1: float meanTuningReal = 0; max@1: for (int iBPS = 0; iBPS < nBPS; ++iBPS) { max@1: meanTuningReal += m_chromadata->meanTunings[iBPS] * m_chromadata->cosvalues[iBPS]; max@1: meanTuningImag += m_chromadata->meanTunings[iBPS] * m_chromadata->sinvalues[iBPS]; max@1: } max@1: float cumulativetuning = 440 * pow(2,atan2(meanTuningImag, meanTuningReal)/(24*M_PI)); max@1: float normalisedtuning = atan2(meanTuningImag, meanTuningReal)/(2*M_PI); max@1: int intShift = floor(normalisedtuning * 3); max@1: float floatShift = normalisedtuning * 3 - intShift; // floatShift is a really bad name for this max@1: max@1: char buffer0 [50]; max@1: max@1: sprintf(buffer0, "estimated tuning: %0.1f Hz", cumulativetuning); max@1: max@1: /** Tune Log-Frequency Spectrogram max@1: calculate a tuned log-frequency spectrogram (f2): use the tuning estimated above (kinda f0) to max@1: perform linear interpolation on the existing log-frequency spectrogram (kinda f1). max@1: **/ Chris@50: // cerr << endl << "[NNLS Chroma Plugin] Tuning Log-Frequency Spectrogram ... "; max@1: max@1: float tempValue = 0; max@1: max@1: int count = 0; max@1: max@1: for (FeatureList::iterator i = m_chromadata->logSpectrum.begin(); i != m_chromadata->logSpectrum.end(); ++i) max@1: { max@1: max@1: Feature f1 = *i; max@1: Feature f2; // tuned log-frequency spectrum max@1: max@1: f2.hasTimestamp = true; max@1: f2.timestamp = f1.timestamp; max@1: max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); // set lower edge to zero max@1: max@1: if (m_chromadata->tuneLocal) { max@1: intShift = floor(m_chromadata->localTuning[count] * 3); max@1: floatShift = m_chromadata->localTuning[count] * 3 - intShift; max@1: // floatShift is a really bad name for this max@1: } max@1: max@1: for (int k = 2; k < (int)f1.values.size() - 3; ++k) max@1: { // interpolate all inner bins max@1: tempValue = f1.values[k + intShift] * (1-floatShift) + f1.values[k+intShift+1] * floatShift; max@1: f2.values.push_back(tempValue); max@1: } max@1: max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); max@1: f2.values.push_back(0.0); // upper edge max@1: max@1: vector runningmean = SpecialConvolution(f2.values,m_chromadata->hw); max@1: vector runningstd; max@1: for (int i = 0; i < nNote; i++) { // first step: squared values into vector (variance) max@1: runningstd.push_back((f2.values[i] - runningmean[i]) * (f2.values[i] - runningmean[i])); max@1: } max@1: runningstd = SpecialConvolution(runningstd,m_chromadata->hw); // second step convolve max@1: for (int i = 0; i < nNote; i++) max@1: { max@1: max@1: runningstd[i] = sqrt(runningstd[i]); max@1: // square root to finally have running std max@1: max@1: if (runningstd[i] > 0) max@1: { max@1: f2.values[i] = (f2.values[i] - runningmean[i]) > 0 ? max@1: (f2.values[i] - runningmean[i]) / pow(runningstd[i],m_chromadata->whitening) : 0; max@1: } max@1: max@1: if (f2.values[i] < 0) { max@1: max@1: cerr << "ERROR: negative value in logfreq spectrum" << endl; max@1: max@1: } max@1: } max@1: tunedlogfreqspec.push_back(f2); max@1: count++; max@1: } Chris@50: // cerr << "done." << endl; max@1: /** Semitone spectrum and chromagrams max@1: Semitone-spaced log-frequency spectrum derived max@1: from the tuned log-freq spectrum above. the spectrum max@1: is inferred using a non-negative least squares algorithm. max@1: Three different kinds of chromagram are calculated, "treble", "bass", and "both" (which means max@1: bass and treble stacked onto each other). max@1: **/ Chris@50: /* max@1: if (m_chromadata->useNNLS == 0) { max@1: cerr << "[NNLS Chroma Plugin] Mapping to semitone spectrum and chroma ... "; max@1: } else { max@1: cerr << "[NNLS Chroma Plugin] Performing NNLS and mapping to chroma ... "; max@1: } Chris@50: */ max@1: vector oldchroma = vector(12,0); max@1: vector oldbasschroma = vector(12,0); max@1: count = 0; max@1: max@1: for (FeatureList::iterator it = tunedlogfreqspec.begin(); it != tunedlogfreqspec.end(); ++it) { max@1: Feature logfreqsp = *it; // logfreq spectrum max@1: Feature bothchroma; // treble and bass chromagram max@1: max@1: bothchroma.hasTimestamp = true; max@1: bothchroma.timestamp = logfreqsp.timestamp; max@1: max@1: float b[nNote]; max@1: max@1: bool some_b_greater_zero = false; max@1: float sumb = 0; max@1: for (int i = 0; i < nNote; i++) { max@1: b[i] = logfreqsp.values[i]; max@1: sumb += b[i]; max@1: if (b[i] > 0) { max@1: some_b_greater_zero = true; max@1: } max@1: } max@1: max@1: // here's where the non-negative least squares algorithm calculates the note activation x max@1: max@1: vector chroma = vector(12, 0); max@1: vector basschroma = vector(12, 0); max@1: float currval; max@1: int iSemitone = 0; max@1: max@1: if (some_b_greater_zero) { max@1: if (m_chromadata->useNNLS == 0) { max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { max@1: currval = 0; max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { max@1: currval += b[iNote + iBPS] * (1-abs(iBPS*1.0/(nBPS/2+1))); max@1: } max@1: chroma[iSemitone % 12] += currval * treblewindow[iSemitone]; max@1: basschroma[iSemitone % 12] += currval * basswindow[iSemitone]; max@1: iSemitone++; max@1: } max@1: max@1: } else { max@1: float x[84+1000]; max@1: for (int i = 1; i < 1084; ++i) x[i] = 1.0; max@1: vector signifIndex; max@1: int index=0; max@1: sumb /= 84.0; max@1: for (int iNote = nBPS/2 + 2; iNote < nNote - nBPS/2; iNote += nBPS) { max@1: float currval = 0; max@1: for (int iBPS = -nBPS/2; iBPS < nBPS/2+1; ++iBPS) { max@1: currval += b[iNote + iBPS]; max@1: } max@1: if (currval > 0) signifIndex.push_back(index); max@1: index++; max@1: } max@1: float rnorm; max@1: float w[84+1000]; max@1: float zz[84+1000]; max@1: int indx[84+1000]; max@1: int mode; max@1: int dictsize = nNote*signifIndex.size(); max@1: max@1: float *curr_dict = new float[dictsize]; max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { max@1: for (int iBin = 0; iBin < nNote; iBin++) { max@1: curr_dict[iNote * nNote + iBin] = max@1: 1.0 * m_chromadata->dict[signifIndex[iNote] * nNote + iBin]; max@1: } max@1: } max@1: nnls(curr_dict, nNote, nNote, signifIndex.size(), b, x, &rnorm, w, zz, indx, &mode); max@1: delete [] curr_dict; max@1: for (int iNote = 0; iNote < (int)signifIndex.size(); ++iNote) { max@1: // cerr << mode << endl; max@1: chroma[signifIndex[iNote] % 12] += x[iNote] * treblewindow[signifIndex[iNote]]; max@1: basschroma[signifIndex[iNote] % 12] += x[iNote] * basswindow[signifIndex[iNote]]; max@1: } max@1: } max@1: } max@1: max@1: chroma.insert(chroma.begin(), basschroma.begin(), basschroma.end()); max@1: // just stack the both chromas max@1: max@1: bothchroma.values = chroma; max@1: returnFeatureList.push_back(bothchroma); max@1: count++; max@1: } Chris@50: // cerr << "done." << endl; max@1: max@1: return returnFeatureList; max@1: } max@1: max@1: /* ------ Beat Quantizer ------ */ max@1: max@4: std::vector Chris@48: Segmentino::beatQuantiser(Vamp::Plugin::FeatureList chromagram, Vamp::Plugin::FeatureList beats) max@1: { max@1: std::vector returnVector; max@1: max@1: FeatureList fwQchromagram; // frame-wise beat-quantised chroma max@1: FeatureList bwQchromagram; // beat-wise beat-quantised chroma matthiasm@43: matthiasm@43: matthiasm@43: size_t nChromaFrame = chromagram.size(); matthiasm@43: size_t nBeat = beats.size(); max@1: max@1: if (nBeat == 0 && nChromaFrame == 0) return returnVector; max@1: Chris@37: int nBin = chromagram[0].values.size(); max@1: max@1: vector tempChroma = vector(nBin); max@1: max@1: Vamp::RealTime beatTimestamp = Vamp::RealTime::zeroTime; max@1: int currBeatCount = -1; // start before first beat max@1: int framesInBeat = 0; max@1: matthiasm@43: for (size_t iChroma = 0; iChroma < nChromaFrame; ++iChroma) max@1: { max@4: Vamp::RealTime frameTimestamp = chromagram[iChroma].timestamp; Chris@24: Vamp::RealTime newBeatTimestamp; Chris@22: Chris@37: if (currBeatCount != (int)beats.size() - 1) { Chris@37: newBeatTimestamp = beats[currBeatCount+1].timestamp; Chris@37: } else { Chris@37: newBeatTimestamp = chromagram[nChromaFrame-1].timestamp; Chris@37: } Chris@22: Chris@24: if (frameTimestamp > newBeatTimestamp || max@1: iChroma == nChromaFrame-1) max@1: { max@1: // new beat (or last chroma frame) max@1: // 1. finish all the old beat processing Chris@23: if (framesInBeat > 0) Chris@23: { Chris@23: for (int i = 0; i < nBin; ++i) tempChroma[i] /= framesInBeat; // average Chris@23: } max@1: max@1: Feature bwQchromaFrame; max@1: bwQchromaFrame.hasTimestamp = true; max@1: bwQchromaFrame.timestamp = beatTimestamp; max@1: bwQchromaFrame.values = tempChroma; Chris@24: bwQchromaFrame.duration = newBeatTimestamp - beatTimestamp; max@1: bwQchromagram.push_back(bwQchromaFrame); max@1: max@1: for (int iFrame = -framesInBeat; iFrame < 0; ++iFrame) max@1: { max@1: Feature fwQchromaFrame; max@1: fwQchromaFrame.hasTimestamp = true; max@1: fwQchromaFrame.timestamp = chromagram[iChroma+iFrame].timestamp; max@1: fwQchromaFrame.values = tempChroma; // all between two beats get the same max@1: fwQchromagram.push_back(fwQchromaFrame); max@1: } max@1: max@1: // 2. increments / resets for current (new) beat max@1: currBeatCount++; Chris@24: beatTimestamp = newBeatTimestamp; Chris@37: for (int i = 0; i < nBin; ++i) tempChroma[i] = 0; // average max@1: framesInBeat = 0; max@1: } max@1: framesInBeat++; Chris@37: for (int i = 0; i < nBin; ++i) tempChroma[i] += chromagram[iChroma].values[i]; max@1: } max@1: returnVector.push_back(fwQchromagram); max@1: returnVector.push_back(bwQchromagram); Chris@30: return returnVector; max@1: } max@1: matthiasm@43: matthiasm@43: max@1: /* -------------------------------- */ max@1: /* ------ Support Functions ------ */ max@1: /* -------------------------------- */ max@1: max@1: // one-dimesion median filter Chris@56: vec medfilt1(vec v, int medfilt_length) max@1: { matthiasm@46: // TODO: check if this works with odd and even medfilt_length !!! max@1: int halfWin = medfilt_length/2; max@1: max@1: // result vector Chris@56: vec res = zeros(v.size()); max@1: max@1: // padding Chris@56: vec padV = zeros(v.size()+medfilt_length-1); max@1: Chris@37: for (int i=medfilt_length/2; i < medfilt_length/2+(int)v.size(); ++ i) max@1: { max@1: padV(i) = v(i-medfilt_length/2); matthiasm@46: } matthiasm@46: matthiasm@46: // the above loop leaves the boundaries at 0, matthiasm@46: // the two loops below fill them with the start or end values of v at start and end matthiasm@46: for (int i = 0; i < halfWin; ++i) padV(i) = v(0); matthiasm@46: for (int i = halfWin+(int)v.size(); i < (int)v.size()+2*halfWin; ++i) padV(i) = v(v.size()-1); matthiasm@46: matthiasm@46: max@1: max@1: // Median filter Chris@56: vec win = zeros(medfilt_length); max@1: Chris@37: for (int i=0; i < (int)v.size(); ++i) max@1: { max@1: win = padV.subvec(i,i+halfWin*2); max@1: win = sort(win); max@1: res(i) = win(halfWin); max@1: } max@1: max@1: return res; max@1: } max@1: max@1: max@1: // Quantile Chris@56: double quantile(vec v, double p) max@1: { Chris@56: vec sortV = sort(v); max@1: int n = sortV.size(); Chris@56: vec x = zeros(n+2); Chris@56: vec y = zeros(n+2); max@1: max@1: x(0) = 0; max@1: x(n+1) = 100; max@1: Chris@21: for (int i=1; i=p*100); max@1: max@1: // Interpolation max@1: double x1 = x(x2index(0)-1); max@1: double x2 = x(x2index(0)); max@1: double y1 = y(x2index(0)-1); max@1: double y2 = y(x2index(0)); max@1: max@1: double res = (y2-y1)/(x2-x1)*(p*100-x1)+y1; max@1: max@1: return res; max@1: } max@1: max@1: // Max Filtering Chris@56: mat maxfilt1(mat inmat, int len) max@1: { Chris@56: mat outmat = inmat; max@1: Chris@37: for (int i=0; i < (int)inmat.n_rows; ++i) max@1: { Chris@56: if (sum(inmat.row(i)) > 0) max@1: { max@1: // Take a window of rows max@1: int startWin; max@1: int endWin; max@1: max@1: if (0 > i-len) max@1: startWin = 0; max@1: else max@1: startWin = i-len; max@1: Chris@37: if ((int)inmat.n_rows-1 < i+len-1) max@1: endWin = inmat.n_rows-1; max@1: else max@1: endWin = i+len-1; max@1: Chris@56: outmat(i,span::all) = Chris@56: max(inmat(span(startWin,endWin),span::all)); max@1: } max@1: } max@1: max@1: return outmat; Chris@56: max@1: } max@1: max@1: // Null Parts Chris@56: Part nullpart(vector parts, vec barline) max@1: { Chris@56: uvec nullindices = ones(barline.size()); Chris@37: for (int iPart=0; iPart<(int)parts.size(); ++iPart) max@1: { Chris@21: //for (int iIndex=0; iIndex < parts[0].indices.size(); ++iIndex) Chris@37: for (int iIndex=0; iIndex < (int)parts[iPart].indices.size(); ++iIndex) Chris@21: for (int i=0; i 0); max@1: Chris@37: for (int i=0; i<(int)q.size();++i) max@1: newPart.indices.push_back(q(i)); max@7: max@1: newPart.letter = '-'; max@1: newPart.value = 0; max@1: newPart.level = 0; max@1: max@1: return newPart; max@1: } max@1: max@1: max@1: // Merge Nulls max@1: void mergenulls(vector &parts) max@1: { Chris@76: /* Chris@76: cerr << "Segmentino: mergenulls: before: "<< endl; Chris@76: for (int iPart=0; iPart<(int)parts.size(); ++iPart) { Chris@76: cerr << parts[iPart].letter << ": "; Chris@76: for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) { Chris@76: cerr << parts[iPart].indices[iIndex]; Chris@76: if (iIndex+1 < (int)parts[iPart].indices.size()) { Chris@76: cerr << ", "; Chris@76: } Chris@76: } Chris@76: cerr << endl; Chris@76: } Chris@76: */ Chris@37: for (int iPart=0; iPart<(int)parts.size(); ++iPart) max@1: { max@1: max@1: vector newVectorPart; max@1: max@1: if (parts[iPart].letter.compare("-")==0) max@1: { max@1: sort (parts[iPart].indices.begin(), parts[iPart].indices.end()); Chris@21: int newpartind = -1; max@1: max@1: vector indices; max@1: indices.push_back(-2); max@1: Chris@37: for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) max@1: indices.push_back(parts[iPart].indices[iIndex]); max@1: Chris@37: for (int iInd=1; iInd < (int)indices.size(); ++iInd) max@1: { max@1: if (indices[iInd] - indices[iInd-1] > 1) max@1: { max@1: newpartind++; max@1: max@1: Part newPart; matthiasm@46: newPart.letter = 'N'; max@1: std::stringstream out; max@1: out << newpartind+1; max@1: newPart.letter.append(out.str()); matthiasm@44: // newPart.value = 20+newpartind+1; matthiasm@44: newPart.value = 0; max@1: newPart.n = 1; max@1: newPart.indices.push_back(indices[iInd]); max@1: newPart.level = 0; max@1: max@1: newVectorPart.push_back(newPart); max@1: } max@1: else max@1: { max@1: newVectorPart[newpartind].n = newVectorPart[newpartind].n+1; max@1: } max@1: } Chris@76: parts.erase (parts.begin() + iPart); max@1: Chris@37: for (int i=0; i<(int)newVectorPart.size(); ++i) max@1: parts.push_back(newVectorPart[i]); Chris@76: Chris@76: break; max@1: } max@1: } Chris@76: /* Chris@76: cerr << "Segmentino: mergenulls: after: "<< endl; Chris@76: for (int iPart=0; iPart<(int)parts.size(); ++iPart) { Chris@76: cerr << parts[iPart].letter << ": "; Chris@76: for (int iIndex=0; iIndex<(int)parts[iPart].indices.size(); ++iIndex) { Chris@76: cerr << parts[iPart].indices[iIndex]; Chris@76: if (iIndex+1 < (int)parts[iPart].indices.size()) { Chris@76: cerr << ", "; Chris@76: } Chris@76: } Chris@76: cerr << endl; Chris@76: } Chris@76: */ max@1: } max@1: max@1: /* ------ Segmentation ------ */ max@1: Chris@19: vector songSegment(Vamp::Plugin::FeatureList quantisedChromagram) max@1: { max@1: max@1: max@1: /* ------ Parameters ------ */ max@1: double thresh_beat = 0.85; max@1: double thresh_seg = 0.80; matthiasm@46: int medfilt_length = 5; max@1: int minlength = 28; matthiasm@46: int maxlength = 2*128; max@1: double quantilePerc = 0.1; max@1: /* ------------------------ */ max@1: max@1: max@1: // Collect Info Chris@19: int nBeat = quantisedChromagram.size(); // Number of feature vector Chris@19: int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector max@1: Chris@27: if (nBeat < minlength) { Chris@27: // return a single part Chris@27: vector parts; Chris@27: Part newPart; Chris@27: newPart.n = 1; Chris@27: newPart.indices.push_back(0); Chris@27: newPart.letter = "n1"; Chris@27: newPart.value = 20; Chris@27: newPart.level = 0; Chris@27: parts.push_back(newPart); Chris@27: return parts; Chris@27: } Chris@27: Chris@56: irowvec timeStamp = zeros(1,nBeat); // Vector of Time Stamps max@1: Chris@22: // Save time stamp as a Vector Chris@19: if (quantisedChromagram[0].hasTimestamp) max@1: { Chris@21: for (int i = 0; i < nBeat; ++ i) Chris@19: timeStamp[i] = quantisedChromagram[i].timestamp.nsec; max@1: } max@1: max@1: max@1: // Build a ObservationTOFeatures Matrix Chris@56: mat featVal = zeros(nBeat,nFeatValues/2); max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) Chris@21: for (int j = 0; j < nFeatValues/2; ++ j) max@1: { matthiasm@44: featVal(i,j) = 0.8 * quantisedChromagram[i].values[j] + quantisedChromagram[i].values[j+12]; // bass attenuated max@1: } max@1: max@1: // Set to arbitrary value to feature vectors with low std Chris@56: mat a = stddev(featVal,1,1); max@1: matthiasm@44: // Feature Correlation Matrix Chris@56: mat simmat0 = 1-cor(trans(featVal)); max@1: max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) max@1: { max@1: if (a(i)<0.000001) max@1: { max@1: featVal(i,1) = 1000; // arbitrary max@1: Chris@21: for (int j = 0; j < nFeatValues/2; ++j) max@1: { max@1: simmat0(i,j) = 1; max@1: simmat0(j,i) = 1; max@1: } max@1: } max@1: } max@1: Chris@56: mat simmat = 1-simmat0/2; max@1: max@1: // -------- To delate when the proble with the add of beat will be solved ------- matthiasm@45: for (int i = 0; i < nBeat; ++ i) matthiasm@45: for (int j = 0; j < nBeat; ++ j) matthiasm@45: if (!std::isfinite(simmat(i,j))) matthiasm@45: simmat(i,j)=0; max@1: // ------------------------------------------------------------------------------ max@1: max@1: // Median Filtering applied to the Correlation Matrix max@1: // The median filter is for each diagonal of the Matrix Chris@56: mat median_simmat = zeros(nBeat,nBeat); max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) max@1: { Chris@56: vec temp = medfilt1(simmat.diag(i),medfilt_length); max@1: median_simmat.diag(i) = temp; max@1: median_simmat.diag(-i) = temp; max@1: } max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) Chris@21: for (int j = 0; j < nBeat; ++ j) max@1: if (!std::isfinite(median_simmat(i,j))) max@1: median_simmat(i,j) = 0; max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.standardise max@1: // med_median_simmat = repmat(median(median_simmat),nBeat,1); max@1: // std_median_simmat = repmat(std(median_simmat),nBeat,1); max@1: // median_simmat = (median_simmat - med_median_simmat) ./ std_median_simmat; max@1: // end max@1: // -------------------------------------------------------- max@1: max@1: // Retrieve Bar Bounderies Chris@56: uvec dup = find(median_simmat > thresh_beat); Chris@56: mat potential_duplicates = zeros(nBeat,nBeat); Chris@56: potential_duplicates.elem(dup) = ones(dup.size()); max@1: potential_duplicates = trimatu(potential_duplicates); max@1: Chris@21: int nPartlengths = round((maxlength-minlength)/4)+1; Chris@56: vec partlengths = zeros(nPartlengths); max@1: Chris@21: for (int i = 0; i < nPartlengths; ++ i) matthiasm@46: partlengths(i) = (i*4) + minlength; max@1: max@1: // initialise arrays Chris@56: cube simArray = zeros(nBeat,nBeat,nPartlengths); Chris@56: cube decisionArray2 = zeros(nBeat,nBeat,nPartlengths); max@1: matthiasm@46: for (int iLength = 0; iLength < nPartlengths; ++ iLength) matthiasm@46: // for (int iLength = 0; iLength < 20; ++ iLength) max@1: { Chris@21: int len = partlengths(iLength); Chris@21: int nUsedBeat = nBeat - len + 1; // number of potential rep beginnings: they can't overlap at the end of the song Chris@33: Chris@33: if (nUsedBeat < 1) continue; max@1: Chris@21: for (int iBeat = 0; iBeat < nUsedBeat; ++ iBeat) // looping over all columns (arbitrarily chosen columns) max@1: { Chris@56: uvec help2 = find(potential_duplicates(span(0,nUsedBeat-1),iBeat)==1); max@1: Chris@37: for (int i=0; i < (int)help2.size(); ++i) max@1: { max@1: max@1: // measure how well two length len segments go together max@1: int kBeat = help2(i); Chris@56: vec distrib = median_simmat(span(iBeat,iBeat+len-1), span(kBeat,kBeat+len-1)).diag(0); max@1: simArray(iBeat,kBeat,iLength) = quantile(distrib,quantilePerc); max@1: } max@1: } max@1: Chris@56: mat tempM = simArray(span(0,nUsedBeat-1), span(0,nUsedBeat-1), span(iLength,iLength)); Chris@56: simArray.slice(iLength)(span(0,nUsedBeat-1), span(0,nUsedBeat-1)) = tempM + trans(tempM) - (eye(nUsedBeat,nUsedBeat)%tempM); max@1: max@1: // convolution Chris@56: vec K = zeros(3); max@1: K << 0.01 << 0.98 << 0.01; max@1: max@1: Chris@37: for (int i=0; i < (int)simArray.n_rows; ++i) max@1: { Chris@56: rowvec t = conv((rowvec)simArray.slice(iLength).row(i),K); Chris@56: simArray.slice(iLength)(i, span::all) = t.subvec(1,t.size()-2); max@1: } max@1: max@1: // take only over-average bars that do not overlap max@1: Chris@56: mat temp = zeros(simArray.n_rows, simArray.n_cols); Chris@56: temp(span::all, span(0,nUsedBeat-1)) = simArray.slice(iLength)(span::all, span(0,nUsedBeat-1)); max@1: Chris@37: for (int i=0; i < (int)temp.n_rows; ++i) Chris@37: for (int j=0; j < nUsedBeat; ++j) max@1: if (temp(i,j) < thresh_seg) max@1: temp(i,j) = 0; max@1: max@1: decisionArray2.slice(iLength) = temp; max@1: Chris@56: mat maxMat = maxfilt1(decisionArray2.slice(iLength),len-1); max@1: Chris@37: for (int i=0; i < (int)decisionArray2.n_rows; ++i) Chris@37: for (int j=0; j < (int)decisionArray2.n_cols; ++j) max@1: if (decisionArray2.slice(iLength)(i,j) < maxMat(i,j)) max@1: decisionArray2.slice(iLength)(i,j) = 0; max@1: Chris@56: decisionArray2.slice(iLength) = decisionArray2.slice(iLength) % trans(decisionArray2.slice(iLength)); max@1: Chris@37: for (int i=0; i < (int)simArray.n_rows; ++i) Chris@37: for (int j=0; j < (int)simArray.n_cols; ++j) max@1: if (simArray.slice(iLength)(i,j) < thresh_seg) max@1: potential_duplicates(i,j) = 0; max@1: } max@1: max@1: // Milk the data max@1: Chris@56: mat bestval; max@1: Chris@21: for (int iLength=0; iLength(decisionArray2.n_rows,decisionArray2.n_cols); max@1: Chris@37: for (int rows=0; rows < (int)decisionArray2.n_rows; ++rows) Chris@37: for (int cols=0; cols < (int)decisionArray2.n_cols; ++cols) max@1: if (decisionArray2.slice(iLength)(rows,cols) > 0) max@1: temp(rows,cols) = 1; max@1: Chris@56: vec currLogicSum = sum(temp,1); max@1: Chris@37: for (int iBeat=0; iBeat < nBeat; ++iBeat) max@1: if (currLogicSum(iBeat) > 1) max@1: { Chris@56: vec t = decisionArray2.slice(iLength)(span::all,iBeat); max@1: double currSum = sum(t); max@1: Chris@21: int count = 0; Chris@37: for (int i=0; i < (int)t.size(); ++i) max@1: if (t(i)>0) max@1: count++; max@1: max@1: currSum = (currSum/count)/2; max@1: Chris@56: rowvec t1; max@1: t1 << (currLogicSum(iBeat)-1) * partlengths(iLength) << currSum << iLength << iBeat << currLogicSum(iBeat); max@1: max@1: bestval = join_cols(bestval,t1); max@1: } max@1: } max@1: max@1: // Definition of the resulting vector max@1: vector parts; max@1: max@1: // make a table of all valid sets of parts max@1: max@1: char partletters[] = {'A','B','C','D','E','F','G', 'H','I','J','K','L','M','N','O','P','Q','R','S'}; Chris@21: int partvalues[] = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19}; Chris@56: vec valid_sets = ones(bestval.n_rows); max@1: max@1: if (!bestval.is_empty()) max@1: { max@1: max@1: // In questo punto viene introdotto un errore alla 3 cifra decimale max@1: Chris@56: colvec t = zeros(bestval.n_rows); Chris@37: for (int i=0; i < (int)bestval.n_rows; ++i) max@1: { max@1: t(i) = bestval(i,1)*2; max@1: } max@1: max@1: double m = t.max(); max@1: Chris@56: bestval(span::all,1) = bestval(span::all,1) / m; Chris@56: bestval(span::all,0) = bestval(span::all,0) + bestval(span::all,1); max@1: Chris@56: mat bestval2; Chris@37: for (int i=0; i < (int)bestval.n_cols; ++i) max@1: if (i!=1) max@1: bestval2 = join_rows(bestval2,bestval.col(i)); max@1: Chris@21: for (int kSeg=0; kSeg<6; ++kSeg) max@1: { Chris@56: mat currbestvals = zeros(bestval2.n_rows, bestval2.n_cols); Chris@37: for (int i=0; i < (int)bestval2.n_rows; ++i) Chris@37: for (int j=0; j < (int)bestval2.n_cols; ++j) max@1: if (valid_sets(i)) max@1: currbestvals(i,j) = bestval2(i,j); max@1: Chris@56: vec t1 = currbestvals.col(0); max@1: double ma; Chris@56: uword maIdx; max@1: ma = t1.max(maIdx); max@6: max@6: if ((maIdx == 0)&&(ma == 0)) max@6: break; max@1: Chris@28: int bestLength = lrint(partlengths(currbestvals(maIdx,1))); Chris@56: rowvec bestIndices = decisionArray2.slice(currbestvals(maIdx,1))(currbestvals(maIdx,2), span::all); max@1: Chris@56: rowvec bestIndicesMap = zeros(bestIndices.size()); Chris@37: for (int i=0; i < (int)bestIndices.size(); ++i) max@1: if (bestIndices(i)>0) max@1: bestIndicesMap(i) = 1; max@1: Chris@56: rowvec mask = zeros(bestLength*2-1); Chris@21: for (int i=0; i 0); max@1: Chris@37: for (int i=0; i < (int)q1.size();++i) max@1: newPart.indices.push_back(q1(i)); max@1: max@1: newPart.letter = partletters[kSeg]; max@1: newPart.value = partvalues[kSeg]; max@1: newPart.level = kSeg+1; max@1: parts.push_back(newPart); max@1: Chris@56: uvec q2 = find(valid_sets==1); max@1: Chris@37: for (int i=0; i < (int)q2.size(); ++i) max@1: { Chris@21: int iSet = q2(i); Chris@21: int s = partlengths(bestval2(iSet,1)); max@1: Chris@56: rowvec mask1 = zeros(s*2-1); Chris@21: for (int i=0; i(Ind.size()); Chris@37: for (int i=0; i < (int)Ind.size(); ++i) max@1: if (Ind(i)>0) max@1: IndMap(i) = 2; max@1: Chris@56: rowvec t3 = conv(IndMap,mask1); Chris@56: rowvec currislands = t3.subvec(mask1.size()/2,t3.size()-1-mask1.size()/2); Chris@56: rowvec islandsdMult = currislands%island; max@6: Chris@56: uvec islandsIndex = find(islandsdMult > 0); max@1: max@6: if (islandsIndex.size() > 0) max@1: valid_sets(iSet) = 0; max@1: } max@1: } max@1: } max@1: else max@1: { max@1: Part newPart; max@1: newPart.n = nBeat; Chris@33: newPart.indices.push_back(0); max@1: newPart.letter = 'A'; max@1: newPart.value = 1; max@1: newPart.level = 1; max@1: parts.push_back(newPart); max@1: } max@6: Chris@56: vec bar = linspace(1,nBeat,nBeat); max@1: Part np = nullpart(parts,bar); max@7: max@1: parts.push_back(np); max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.editor max@1: // [pa, ta] = partarray(parts); max@1: // parts = editorssearch(pa, ta, parts); max@1: // parts = [parts, nullpart(parts,1:nBeat)]; max@1: // end max@1: // ------------------------------------------------------------------ max@1: max@1: max@1: mergenulls(parts); max@1: max@1: max@1: // -------------- NOT CONVERTED ------------------------------------- max@1: // if param.seg.editor max@1: // [pa, ta] = partarray(parts); max@1: // parts = editorssearch(pa, ta, parts); max@1: // parts = [parts, nullpart(parts,1:nBeat)]; max@1: // end max@1: // ------------------------------------------------------------------ max@1: max@1: return parts; max@1: } max@1: max@1: max@1: Chris@19: void songSegmentChroma(Vamp::Plugin::FeatureList quantisedChromagram, vector &parts) max@1: { max@1: // Collect Info Chris@19: int nBeat = quantisedChromagram.size(); // Number of feature vector Chris@19: int nFeatValues = quantisedChromagram[0].values.size(); // Number of values for each feature vector max@1: Chris@56: mat synchTreble = zeros(nBeat,nFeatValues/2); max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) Chris@21: for (int j = 0; j < nFeatValues/2; ++ j) max@1: { Chris@19: synchTreble(i,j) = quantisedChromagram[i].values[j]; max@1: } max@1: Chris@56: mat synchBass = zeros(nBeat,nFeatValues/2); max@1: Chris@21: for (int i = 0; i < nBeat; ++ i) Chris@21: for (int j = 0; j < nFeatValues/2; ++ j) max@1: { Chris@19: synchBass(i,j) = quantisedChromagram[i].values[j+12]; max@1: } max@1: max@1: // Process max@1: Chris@56: mat segTreble = zeros(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); Chris@56: mat segBass = zeros(quantisedChromagram.size(),quantisedChromagram[0].values.size()/2); max@1: Chris@37: for (int iPart=0; iPart < (int)parts.size(); ++iPart) max@1: { max@1: parts[iPart].nInd = parts[iPart].indices.size(); max@1: Chris@21: for (int kOccur=0; kOccur songSegmentIntegration(vector &parts) max@1: { max@1: // Break up parts (every part will have one instance) max@1: vector newPartVector; max@1: vector partindices; max@1: Chris@37: for (int iPart=0; iPart < (int)parts.size(); ++iPart) max@1: { max@1: parts[iPart].nInd = parts[iPart].indices.size(); Chris@21: for (int iInstance=0; iInstance parts; max@1: vector finalParts; max@1: Chris@19: parts = songSegment(quantisedChromagram); Chris@19: songSegmentChroma(quantisedChromagram,parts); max@7: max@1: finalParts = songSegmentIntegration(parts); max@1: max@1: max@1: // TEMP ---- Chris@21: /*for (int i=0;i values; max@1: vector letters; max@1: Chris@37: for (int iPart=0; iPart < (int)finalParts.size()-1; ++iPart) max@1: { Chris@21: int iInstance=0; max@1: seg.hasTimestamp = true; max@1: max@1: int ind = finalParts[iPart].indices[iInstance]; max@1: int ind1 = finalParts[iPart+1].indices[iInstance]; max@1: Chris@19: seg.timestamp = quantisedChromagram[ind].timestamp; max@1: seg.hasDuration = true; Chris@19: seg.duration = quantisedChromagram[ind1].timestamp-quantisedChromagram[ind].timestamp; max@1: seg.values.clear(); max@1: seg.values.push_back(finalParts[iPart].value); max@1: seg.label = finalParts[iPart].letter; max@1: max@1: results.push_back(seg); max@1: } max@1: Chris@37: if (finalParts.size() > 0) { Chris@37: int ind = finalParts[finalParts.size()-1].indices[0]; Chris@37: seg.hasTimestamp = true; Chris@37: seg.timestamp = quantisedChromagram[ind].timestamp; Chris@37: seg.hasDuration = true; Chris@37: seg.duration = quantisedChromagram[quantisedChromagram.size()-1].timestamp-quantisedChromagram[ind].timestamp; Chris@37: seg.values.clear(); Chris@37: seg.values.push_back(finalParts[finalParts.size()-1].value); Chris@37: seg.label = finalParts[finalParts.size()-1].letter; max@1: Chris@37: results.push_back(seg); Chris@37: } max@1: max@1: return results; max@1: } max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: max@1: