Mercurial > hg > qm-vamp-plugins
diff plugins/AdaptiveSpectrogram.cpp @ 92:3602e755b696
* Add the Adaptive Spectrogram plugin -- but it isn't working correctly
yet. Also, when it does work, it will need to be refactored out into
the qm-dsp library
author | Chris Cannam <c.cannam@qmul.ac.uk> |
---|---|
date | Fri, 27 Feb 2009 10:45:10 +0000 |
parents | |
children | 385bec9df059 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plugins/AdaptiveSpectrogram.cpp Fri Feb 27 10:45:10 2009 +0000 @@ -0,0 +1,635 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +/* + QM Vamp Plugin Set + + Centre for Digital Music, Queen Mary, University of London. + All rights reserved. +*/ + +#include "AdaptiveSpectrogram.h" + +#include <cstdlib> +#include <cstring> + +#include <iostream> + +#include <dsp/transforms/FFT.h> + +using std::string; +using std::vector; +using std::cerr; +using std::endl; + +using Vamp::RealTime; + +AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) : + Plugin(inputSampleRate), + m_w(9), + m_n(2) +{ +} + +AdaptiveSpectrogram::~AdaptiveSpectrogram() +{ +} + +string +AdaptiveSpectrogram::getIdentifier() const +{ + return "adaptivespectrogram"; +} + +string +AdaptiveSpectrogram::getName() const +{ + return "Adaptive Spectrogram"; +} + +string +AdaptiveSpectrogram::getDescription() const +{ + return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions"; +} + +string +AdaptiveSpectrogram::getMaker() const +{ + return "Queen Mary, University of London"; +} + +int +AdaptiveSpectrogram::getPluginVersion() const +{ + return 1; +} + +string +AdaptiveSpectrogram::getCopyright() const +{ + return "Plugin by Wen Xue and Chris Cannam. Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved"; +} + +size_t +AdaptiveSpectrogram::getPreferredStepSize() const +{ + return ((2 << m_w) << m_n) / 2; +} + +size_t +AdaptiveSpectrogram::getPreferredBlockSize() const +{ + return (2 << m_w) << m_n; +} + +bool +AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize) +{ + if (channels < getMinChannelCount() || + channels > getMaxChannelCount()) return false; + + return true; +} + +void +AdaptiveSpectrogram::reset() +{ + +} + +AdaptiveSpectrogram::ParameterList +AdaptiveSpectrogram::getParameterDescriptors() const +{ + ParameterList list; + + ParameterDescriptor desc; + desc.identifier = "n"; + desc.name = "Number of resolutions"; + desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified"; + desc.unit = ""; + desc.minValue = 1; + desc.maxValue = 10; + desc.defaultValue = 3; + desc.isQuantized = true; + desc.quantizeStep = 1; + list.push_back(desc); + + ParameterDescriptor desc2; + desc2.identifier = "w"; + desc2.name = "Smallest resolution"; + desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions"; + desc2.unit = ""; + desc2.minValue = 1; + desc2.maxValue = 14; + desc2.defaultValue = 10; + desc2.isQuantized = true; + desc2.quantizeStep = 1; + // I am so lazy + desc2.valueNames.push_back("2"); + desc2.valueNames.push_back("4"); + desc2.valueNames.push_back("8"); + desc2.valueNames.push_back("16"); + desc2.valueNames.push_back("32"); + desc2.valueNames.push_back("64"); + desc2.valueNames.push_back("128"); + desc2.valueNames.push_back("256"); + desc2.valueNames.push_back("512"); + desc2.valueNames.push_back("1024"); + desc2.valueNames.push_back("2048"); + desc2.valueNames.push_back("4096"); + desc2.valueNames.push_back("8192"); + desc2.valueNames.push_back("16384"); + list.push_back(desc2); + + return list; +} + +float +AdaptiveSpectrogram::getParameter(std::string id) const +{ + if (id == "n") return m_n+1; + else if (id == "w") return m_w+1; + return 0.f; +} + +void +AdaptiveSpectrogram::setParameter(std::string id, float value) +{ + if (id == "n") { + int n = lrintf(value); + if (n >= 1 && n <= 10) m_n = n-1; + } else if (id == "w") { + int w = lrintf(value); + if (w >= 1 && w <= 14) m_w = w-1; + } +} + +AdaptiveSpectrogram::OutputList +AdaptiveSpectrogram::getOutputDescriptors() const +{ + OutputList list; + + OutputDescriptor d; + d.identifier = "output"; + d.name = "Output"; + d.description = "The output of the plugin"; + d.unit = ""; + d.hasFixedBinCount = true; + d.binCount = ((2 << m_w) << m_n) / 2; + d.hasKnownExtents = false; + d.isQuantized = false; + d.sampleType = OutputDescriptor::FixedSampleRate; + d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2); + d.hasDuration = false; + list.push_back(d); + + return list; +} + +AdaptiveSpectrogram::FeatureSet +AdaptiveSpectrogram::getRemainingFeatures() +{ + FeatureSet fs; + return fs; +} + +AdaptiveSpectrogram::FeatureSet +AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts) +{ + FeatureSet fs; + + int wid = (2 << m_w), WID = ((2 << m_w) << m_n); + int Res = log2(WID/wid)+1; + double ***specs = new double **[Res]; + int Wid = WID; + int wi = 0; + + cerr << "wid = " << wid << ", WID = " << WID << endl; + + double *tmpin = new double[WID]; + double *tmprout = new double[WID]; + double *tmpiout = new double[WID]; + + while (Wid >= wid) { + specs[wi] = new double *[WID/Wid]; + for (int i = 0; i < WID/Wid; ++i) { + specs[wi][i] = new double[Wid/2]; + int origin = WID/4 - Wid/4; // for 50% overlap + for (int j = 0; j < Wid; ++j) { + double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid); + tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul; + } + FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout); + for (int j = 0; j < Wid/2; ++j) { + double mag = + tmprout[j] * tmprout[j] + + tmpiout[j] * tmpiout[j]; + specs[wi][i][j] = sqrt(mag) / Wid; + } + } + Wid /= 2; + ++wi; + } + + int *spl = new int[WID/2]; + double *spec = new double[WID/2]; + + // This prefill makes it easy to see which elements are actually + // set by the MixSpectrogramBlock2 call. Turns out that, with + // 1024, 2048 and 4096 as our widths, the spl array has elements + // 0-4094 (incl) filled in and the spec array has elements 0-4095 + + for (int i = 0; i < WID/2; ++i) { + spl[i] = i; + spec[i] = i; + } + + MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false); + + Wid = WID; + wi = 0; + while (Wid >= wid) { + for (int i = 0; i < WID/Wid; ++i) { + delete[] specs[wi][i]; + } + delete[] specs[wi]; + Wid /= 2; + ++wi; + } + delete[] specs; + + std::cerr << "Results at " << ts << ":" << std::endl; +/* for (int i = 0; i < WID/2; ++i) { + if (spl[i] == i || spec[i] == i) { + std::cerr << "\n***\n"; + } + std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " "; + } + std::cerr << std::endl; +*/ + vector<vector<float> > rmat(WID/wid); + for (int i = 0; i < WID/wid; ++i) { + rmat[i] = vector<float>(WID/2); + } + + int y = 0, h = WID/2; + int x = 0, w = WID/wid; + unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID); + + delete[] spec; + delete[] spl; + + for (int i = 0; i < rmat.size(); ++i) { + Feature f; + f.hasTimestamp = false; + f.values = rmat[i]; + fs[0].push_back(f); + } + +/* + if (m_stepSize == 0) { + cerr << "ERROR: AdaptiveSpectrogram::process: " + << "AdaptiveSpectrogram has not been initialised" + << endl; + return fs; + } +*/ + return fs; +} + +void +AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat, + int x, int y, int w, int h, + int *spl, + double *spec, int sz, + int res + ) +{ + + cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h + << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res << endl; + + if (sz <= 1) { + + for (int i = 0; i < w; ++i) { + for (int j = 0; j < h; ++j) { +// rmat[x+i][y+j] = (off ? 0 : *spec); + if (rmat[x+i][y+j] != 0) { + cerr << "WARNING: Overwriting value " << rmat[x+i][y+j] + << " with " << res + i + j << " at " << x+i << "," << y+j << endl; + } +// cerr << "[" << x+i << "][" << y+j << "] <= " << res+i+j << endl; + rmat[x+i][y+j] = *spec; + } + } + +// cerr << " (done)" << endl; + return; + } +// cerr << endl; + + if (*spl == 0) { + + unpackResultMatrix(rmat, + x, y, + w, h/2, + spl + 1, + spec, + sz/2, + res); + + unpackResultMatrix(rmat, + x, y + h/2, + w, h/2, + spl + sz/2, + spec + sz/2, + sz/2, + res); + + } else if (*spl == 1) { + + unpackResultMatrix(rmat, + x, y, + w/2, h, + spl + 1, + spec, + sz/2, + res/2); + + unpackResultMatrix(rmat, + x + w/2, y, + w/2, h, + spl + sz/2, + spec + sz/2, + sz/2, + res/2); + + } else { + cerr << "ERROR: *spl = " << *spl << endl; + } +} + +//spl[Y-1] +//Specs[R0][x0:x0+x-1][Y0:Y0+Y-1] +//Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1] +//... +//Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1] +//N=WID/wid + +/** + * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl. + */ +double +AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0, + int x0, int Y0, int N, double& ene) +{ + double ent = 0; + + if (Y > N) { + + spl[0] = 0; + double ene1, ene2; + + ent += DoCutSpectrogramBlock2 + (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1); + + ent += DoCutSpectrogramBlock2 + (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2); + + ene = ene1+ene2; + + } else if (N == 1) { + + double tmp = Specs[R0][x0][Y0]; + ene = tmp; + ent = xlogx(tmp); + + } else { + // Y == N, the square case + + double enel, ener, enet, eneb, entl, entr, entt, entb; + int* tmpspl = new int[Y]; + + entl = DoCutSpectrogramBlock2 + (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel); + + entr = DoCutSpectrogramBlock2 + (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener); + + entb = DoCutSpectrogramBlock2 + (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb); + + entt = DoCutSpectrogramBlock2 + (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet); + + double + ene0 = enet + eneb, + ene1 = enel + ener, + ent0 = entt + entb, + ent1 = entl + entr; + + // normalize + + double eneres = 1 - (ene0+ene1)/2, norment0, norment1; + double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres); + + // quasi-global normalization + + norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres); + norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres); + + // local normalization + + if (norment1 < norment0) { + spl[0] = 0; + ent = ent0, ene = ene0; + memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2)); + } else { + spl[0] = 1; + ent = ent1, ene = ene1; + } + } + return ent; +} + +/** + * DoMixSpectrogramBlock2 collects values from the multiple + * spectrograms Specs into a linear array Spec. + */ +double +AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y, + int R0, int x0, int Y0, bool normmix, int res, + double* e) +{ + if (Y == 1) { + + Spec[0] = Specs[R0][x0][Y0]*e[0]; + + } else { + + double le[32]; + + if (normmix && Y < (1<<res)) { + + for (int i = 0, j = 1, k = Y; + i < res; + i++, j *= 2, k /= 2) { + + double lle = 0; + + for (int fr = 0; fr < j; fr++) { + for (int n = 0; n < k; n++) { + lle += + Specs[i+R0][x0+fr][Y0+n] * + Specs[i+R0][x0+fr][Y0+n]; + } + } + + lle = sqrt(lle)*e[i]; + + if (i == 0) { + le[0] = lle; + } else if (lle > le[0]*2) { + le[i] = e[i]*le[0]*2/lle; + } else { + le[i] = e[i]; + } + } + + le[0] = e[0]; + + } else { + + memcpy(le, e, sizeof(double)*res); + } + + if (spl[0] == 0) { + + int newres; + if (Y >= (1<<res)) newres = res; + else newres = res-1; + + DoMixSpectrogramBlock2 + (&spl[1], Spec, Specs, Y/2, R0, x0, Y0, + normmix, newres, le); + + DoMixSpectrogramBlock2 + (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, + normmix, newres, le); + + } else { + + DoMixSpectrogramBlock2 + (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2, + normmix, res-1, &le[1]); + + DoMixSpectrogramBlock2 + (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2, + normmix, res-1, &le[1]); + } + } + + return 0; +} + +/** + * MixSpectrogramBlock2 calls the two Do...() to do the real work. + * + * At this point: + * spl is... what? the returned "cutting", organised how? + * Spec is... what? the returned spectrogram, organised how? + * Specs is an array of input spectrograms + * WID is the maximum window size + * wid is the minimum window size + * normmix is... what? + */ +double +AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int + WID, int wid, bool normmix) +{ + double ene[32]; + + // find the total energy and normalize + + for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) { + + double lene = 0; + + for (int fr = 0; fr < Fr; fr++) { + for (int k = 0; k < Wid; k++) { + lene += Specs[i][fr][k]*Specs[i][fr][k]; + } + } + + ene[i] = lene; + + if (lene != 0) { + double ilene = 1.0/lene; + for (int fr = 0; fr < Fr; fr++) { + for (int k = 0; k < Wid; k++) { + Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene; + } + } + } + } + + + double result = DoCutSpectrogramBlock2 + (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]); + + // de-normalize + + for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) { + double lene = ene[i]; + if (lene != 0) { + for (int fr = 0; fr < Fr; fr++) { + for (int k = 0; k < Wid; k++) { + Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene); + } + } + } + } + + double e[32]; + for (int i = 0; i < 32; i++) e[i] = 1; + + DoMixSpectrogramBlock2 + (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e); + + return result; +} + +/** + * MixSpectrogram2 does the work for Fr frames (the largest frame), + * which basically calls MixSpectrogramBlock2 Fr times. + * + * the 3-D array Specs is the multiple spectrograms calculated with + * window sizes between wid and WID, Specs[0] is the 0th spectrogram, + * etc. + * + * spl and Spec for all frames are returned by MixSpectrogram2, each + * as a 2-D array. + */ +double +AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr, + int WID, int wid, bool norm, bool normmix) +{ + // totally Fr frames of WID samples + // each frame is divided into wid VERTICAL parts + + int Res = log2(WID/wid)+1; + double*** lSpecs = new double**[Res]; + + for (int i = 0; i < Fr; i++) { + + for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) { + lSpecs[j] = &Specs[j][i*nfr]; + } + + MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm); + } + + delete[] lSpecs; + return 0; +} +