Mercurial > hg > qm-vamp-plugins

/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    QM Vamp Plugin Set

    Centre for Digital Music, Queen Mary, University of London.
    All rights reserved.
*/

#include "AdaptiveSpectrogram.h"

#include <cstdlib>
#include <cstring>

#include <iostream>

#include <dsp/transforms/FFT.h>

using std::string;
using std::vector;
using std::cerr;
using std::endl;

using Vamp::RealTime;

AdaptiveSpectrogram::AdaptiveSpectrogram(float inputSampleRate) :
    Plugin(inputSampleRate),
    m_w(9),
    m_n(2)
{
}

AdaptiveSpectrogram::~AdaptiveSpectrogram()
{
}

string
AdaptiveSpectrogram::getIdentifier() const
{
    return "qm-adaptivespectrogram";
}

string
AdaptiveSpectrogram::getName() const
{
    return "Adaptive Spectrogram";
}

string
AdaptiveSpectrogram::getDescription() const
{
    return "Produce an adaptive spectrogram by adaptive selection from spectrograms at multiple resolutions";
}

string
AdaptiveSpectrogram::getMaker() const
{
    return "Queen Mary, University of London";
}

int
AdaptiveSpectrogram::getPluginVersion() const
{
    return 1;
}

string
AdaptiveSpectrogram::getCopyright() const
{
    return "Plugin by Wen Xue and Chris Cannam.  Copyright (c) 2009 Wen Xue and QMUL - All Rights Reserved";
}

size_t
AdaptiveSpectrogram::getPreferredStepSize() const
{
    return ((2 << m_w) << m_n) / 2;
}

size_t
AdaptiveSpectrogram::getPreferredBlockSize() const
{
    return (2 << m_w) << m_n;
}

bool
AdaptiveSpectrogram::initialise(size_t channels, size_t stepSize, size_t blockSize)
{
    if (channels < getMinChannelCount() ||
	channels > getMaxChannelCount()) return false;

    return true;
}

void
AdaptiveSpectrogram::reset()
{

}

AdaptiveSpectrogram::ParameterList
AdaptiveSpectrogram::getParameterDescriptors() const
{
    ParameterList list;

    ParameterDescriptor desc;
    desc.identifier = "n";
    desc.name = "Number of resolutions";
    desc.description = "Number of consecutive powers of two to use as spectrogram resolutions, starting with the minimum resolution specified";
    desc.unit = "";
    desc.minValue = 1;
    desc.maxValue = 10;
    desc.defaultValue = 3;
    desc.isQuantized = true;
    desc.quantizeStep = 1;
    list.push_back(desc);

    ParameterDescriptor desc2;
    desc2.identifier = "w";
    desc2.name = "Smallest resolution";
    desc2.description = "Smallest of the consecutive powers of two to use as spectrogram resolutions";
    desc2.unit = "";
    desc2.minValue = 1;
    desc2.maxValue = 14;
    desc2.defaultValue = 10;
    desc2.isQuantized = true;
    desc2.quantizeStep = 1;
    // I am so lazy
    desc2.valueNames.push_back("2");
    desc2.valueNames.push_back("4");
    desc2.valueNames.push_back("8");
    desc2.valueNames.push_back("16");
    desc2.valueNames.push_back("32");
    desc2.valueNames.push_back("64");
    desc2.valueNames.push_back("128");
    desc2.valueNames.push_back("256");
    desc2.valueNames.push_back("512");
    desc2.valueNames.push_back("1024");
    desc2.valueNames.push_back("2048");
    desc2.valueNames.push_back("4096");
    desc2.valueNames.push_back("8192");
    desc2.valueNames.push_back("16384");
    list.push_back(desc2);

    return list;
}

float
AdaptiveSpectrogram::getParameter(std::string id) const
{
    if (id == "n") return m_n+1;
    else if (id == "w") return m_w+1;
    return 0.f;
}

void
AdaptiveSpectrogram::setParameter(std::string id, float value)
{
    if (id == "n") {
        int n = lrintf(value);
        if (n >= 1 && n <= 10) m_n = n-1;
    } else if (id == "w") {
        int w = lrintf(value);
        if (w >= 1 && w <= 14) m_w = w-1;
    }
}

AdaptiveSpectrogram::OutputList
AdaptiveSpectrogram::getOutputDescriptors() const
{
    OutputList list;

    OutputDescriptor d;
    d.identifier = "output";
    d.name = "Output";
    d.description = "The output of the plugin";
    d.unit = "";
    d.hasFixedBinCount = true;
    d.binCount = ((2 << m_w) << m_n) / 2;
    d.hasKnownExtents = false;
    d.isQuantized = false;
    d.sampleType = OutputDescriptor::FixedSampleRate;
    d.sampleRate = m_inputSampleRate / ((2 << m_w) / 2);
    d.hasDuration = false;
    list.push_back(d);

    return list;
}

AdaptiveSpectrogram::FeatureSet
AdaptiveSpectrogram::getRemainingFeatures()
{
    FeatureSet fs;
    return fs;
}

AdaptiveSpectrogram::FeatureSet
AdaptiveSpectrogram::process(const float *const *inputBuffers, RealTime ts)
{
    FeatureSet fs;

    int wid = (2 << m_w), WID = ((2 << m_w) << m_n);
    int Res = log2(WID/wid)+1;
    double ***specs = new double **[Res];
    int Wid = WID;
    int wi = 0;

    cerr << "wid = " << wid << ", WID = " << WID << endl;

    double *tmpin  = new double[WID];
    double *tmprout = new double[WID];
    double *tmpiout = new double[WID];

    while (Wid >= wid) {
        specs[wi] = new double *[WID/Wid];
        for (int i = 0; i < WID/Wid; ++i) {
            specs[wi][i] = new double[Wid/2];
            int origin = WID/4 - Wid/4; // for 50% overlap
            for (int j = 0; j < Wid; ++j) {
                double mul = 0.50 - 0.50 * cos((2 * M_PI * j) / Wid);
                tmpin[j] = inputBuffers[0][origin + i * Wid/2 + j] * mul;
            }
            FFT::process(Wid, false, tmpin, 0, tmprout, tmpiout);
            for (int j = 0; j < Wid/2; ++j) {
                double mag =
                    tmprout[j] * tmprout[j] +
                    tmpiout[j] * tmpiout[j];
                specs[wi][i][j] = sqrt(mag) / Wid;
            }
        }
        Wid /= 2;
        ++wi;
    }

    int *spl = new int[WID/2];
    double *spec = new double[WID/2];

    // This prefill makes it easy to see which elements are actually
    // set by the MixSpectrogramBlock2 call.  Turns out that, with
    // 1024, 2048 and 4096 as our widths, the spl array has elements
    // 0-4094 (incl) filled in and the spec array has elements 0-4095

    for (int i = 0; i < WID/2; ++i) {
        spl[i] = i;
        spec[i] = i;
    }

    MixSpectrogramBlock2(spl, spec, specs, WID/2, wid/2, false);

    Wid = WID;
    wi = 0;
    while (Wid >= wid) {
        for (int i = 0; i < WID/Wid; ++i) {
            delete[] specs[wi][i];
        }
        delete[] specs[wi];
        Wid /= 2;
        ++wi;
    }
    delete[] specs;

    std::cerr << "Results at " << ts << ":" << std::endl;
/*    for (int i = 0; i < WID/2; ++i) {
        if (spl[i] == i || spec[i] == i) {
            std::cerr << "\n***\n";
        }
        std::cerr << "[" << i << "] " << spl[i] << "," << spec[i] << " ";
    }
    std::cerr << std::endl;
*/
    vector<vector<float> > rmat(WID/wid);
    for (int i = 0; i < WID/wid; ++i) {
        rmat[i] = vector<float>(WID/2);
    }

    int y = 0, h = WID/2;
    int x = 0, w = WID/wid;
    unpackResultMatrix(rmat, x, y, w, h, spl, spec, WID/2, WID);

    delete[] spec;
    delete[] spl;

    for (int i = 0; i < rmat.size(); ++i) {
        Feature f;
        f.hasTimestamp = false;
        f.values = rmat[i];
        fs[0].push_back(f);
    }

/*
    if (m_stepSize == 0) {
	cerr << "ERROR: AdaptiveSpectrogram::process: "
	     << "AdaptiveSpectrogram has not been initialised"
	     << endl;
	return fs;
    }
*/
    return fs;
}

void
AdaptiveSpectrogram::unpackResultMatrix(vector<vector<float> > &rmat,
                                  int x, int y, int w, int h,
                                  int *spl,
                                  double *spec, int sz,
                                  int res
    )
{

    cerr << "x = " << x << ", y = " << y << ", w = " << w << ", h = " << h
         << ", sz = " << sz << ", *spl = " << *spl << ", *spec = " << *spec << ", res = " << res <<  endl;

    if (sz <= 1) {

        for (int i = 0; i < w; ++i) {
            for (int j = 0; j < h; ++j) {
//                rmat[x+i][y+j] = (off ? 0 : *spec);
                if (rmat[x+i][y+j] != 0) {
                    cerr << "WARNING: Overwriting value " << rmat[x+i][y+j]
                         << " with " << res + i + j << " at " << x+i << "," << y+j << endl;
                }
//                cerr << "[" << x+i << "][" << y+j << "] <= " << res+i+j << endl;
                rmat[x+i][y+j] = *spec;
            }
        }

//        cerr << " (done)" << endl;
        return;
    }
//    cerr << endl;

    if (*spl == 0) {

        unpackResultMatrix(rmat,
                           x, y,
                           w, h/2,
                           spl + 1,
                           spec,
                           sz/2,
                           res);

        unpackResultMatrix(rmat,
                           x, y + h/2,
                           w, h/2,
                           spl + sz/2,
                           spec + sz/2,
                           sz/2,
                           res);

    } else if (*spl == 1) {

        unpackResultMatrix(rmat,
                           x, y,
                           w/2, h,
                           spl + 1,
                           spec,
                           sz/2,
                           res/2);

        unpackResultMatrix(rmat,
                           x + w/2, y,
                           w/2, h,
                           spl + sz/2,
                           spec + sz/2,
                           sz/2,
                           res/2);

    } else {
        cerr << "ERROR: *spl = " << *spl << endl;
    }
}

//spl[Y-1]
//Specs[R0][x0:x0+x-1][Y0:Y0+Y-1]
//Specs[R0+1][2x0:2x0+2x-1][Y0/2:Y0/2+Y/2-1]
//...
//Specs[R0+?][Nx0:Nx0+Nx-1][Y0/N:Y0/N+Y/N-1]
//N=WID/wid

/**
 * DoCutSpectrogramBlock2 finds the optimal "cutting" and returns it in spl.
 */
double
AdaptiveSpectrogram::DoCutSpectrogramBlock2(int* spl, double*** Specs, int Y, int R0,
                                      int x0, int Y0, int N, double& ene)
{
    double ent = 0;

    if (Y > N) {

        spl[0] = 0;
        double ene1, ene2;

        ent += DoCutSpectrogramBlock2
            (&spl[1], Specs, Y/2, R0, x0, Y0, N, ene1);

        ent += DoCutSpectrogramBlock2
            (&spl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N, ene2);

        ene = ene1+ene2;

    } else if (N == 1) {

        double tmp = Specs[R0][x0][Y0];
        ene = tmp;
        ent = xlogx(tmp);

    } else {
        // Y == N, the square case

        double enel, ener, enet, eneb, entl, entr, entt, entb;
        int* tmpspl = new int[Y];

        entl = DoCutSpectrogramBlock2
            (&spl[1], Specs, Y/2, R0+1, 2*x0, Y0/2, N/2, enel);

        entr = DoCutSpectrogramBlock2
            (&spl[Y/2], Specs, Y/2, R0+1, 2*x0+1, Y0/2, N/2, ener);

        entb = DoCutSpectrogramBlock2
            (&tmpspl[1], Specs, Y/2, R0, x0, Y0, N/2, eneb);

        entt = DoCutSpectrogramBlock2
            (&tmpspl[Y/2], Specs, Y/2, R0, x0, Y0+Y/2, N/2, enet);

        double
            ene0 = enet + eneb,
            ene1 = enel + ener,
            ent0 = entt + entb,
            ent1 = entl + entr;

        // normalize

        double eneres = 1 - (ene0+ene1)/2, norment0, norment1;
        double a0 = 1 / (ene0+eneres), a1 = 1 / (ene1+eneres);

        // quasi-global normalization

        norment0 = (ent0 - ene0 * log(ene0+eneres)) / (ene0+eneres);
        norment1 = (ent1 - ene1 * log(ene1+eneres)) / (ene1+eneres);

        // local normalization

        if (norment1 < norment0) {
            spl[0] = 0;
            ent = ent0, ene = ene0;
            memcpy(&spl[1], &tmpspl[1], sizeof(int)*(Y-2));
        } else {
            spl[0] = 1;
            ent = ent1, ene = ene1;
        }
    }
    return ent;
}

/**
 * DoMixSpectrogramBlock2 collects values from the multiple
 * spectrograms Specs into a linear array Spec.
 */
double
AdaptiveSpectrogram::DoMixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int Y,
                                      int R0, int x0, int Y0, bool normmix, int res,
                                      double* e)
{
    if (Y == 1) {

        Spec[0] = Specs[R0][x0][Y0]*e[0];

    } else {

        double le[32];

        if (normmix && Y < (1<<res)) {

            for (int i = 0, j = 1, k = Y;
                 i < res;
                 i++, j *= 2, k /= 2) {

                double lle = 0;

                for (int fr = 0; fr < j; fr++) {
                    for (int n = 0; n < k; n++) {
                        lle +=
                            Specs[i+R0][x0+fr][Y0+n] *
                            Specs[i+R0][x0+fr][Y0+n];
                    }
                }

                lle = sqrt(lle)*e[i];

                if (i == 0) {
                    le[0] = lle;
                } else if (lle > le[0]*2) {
                    le[i] = e[i]*le[0]*2/lle;
                } else {
                    le[i] = e[i];
                }
            }

            le[0] = e[0];

        } else {

            memcpy(le, e, sizeof(double)*res);
        }

        if (spl[0] == 0) {

            int newres;
            if (Y >= (1<<res)) newres = res;
            else newres = res-1;

            DoMixSpectrogramBlock2
                (&spl[1], Spec, Specs, Y/2, R0, x0, Y0,
                 normmix, newres, le);

            DoMixSpectrogramBlock2
                (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0, x0, Y0+Y/2,
                 normmix, newres, le);

        } else {

            DoMixSpectrogramBlock2
                (&spl[1], Spec, Specs, Y/2, R0+1, x0*2, Y0/2,
                 normmix, res-1, &le[1]);

            DoMixSpectrogramBlock2
                (&spl[Y/2], &Spec[Y/2], Specs, Y/2, R0+1, x0*2+1, Y0/2,
                 normmix, res-1, &le[1]);
        }
    }

    return 0;
}

/**
 * MixSpectrogramBlock2 calls the two Do...() to do the real work.
 *
 * At this point:
 * spl is... what?  the returned "cutting", organised how?
 * Spec is... what?  the returned spectrogram, organised how?
 * Specs is an array of input spectrograms
 * WID is the maximum window size
 * wid is the minimum window size
 * normmix is... what?
 */
double
AdaptiveSpectrogram::MixSpectrogramBlock2(int* spl, double* Spec, double*** Specs, int
                                    WID, int wid, bool normmix)
{
    double ene[32];

    // find the total energy and normalize

    for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {

        double lene = 0;

        for (int fr = 0; fr < Fr; fr++) {
            for (int k = 0; k < Wid; k++) {
                lene += Specs[i][fr][k]*Specs[i][fr][k];
            }
        }

        ene[i] = lene;

        if (lene != 0) {
            double ilene = 1.0/lene;
            for (int fr = 0; fr < Fr; fr++) {
                for (int k = 0; k < Wid; k++) {
                    Specs[i][fr][k] = Specs[i][fr][k]*Specs[i][fr][k]*ilene;
                }
            }
        }
    }


    double result = DoCutSpectrogramBlock2
        (spl, Specs, WID, 0, 0, 0, WID/wid, ene[31]);

    // de-normalize

    for (int i = 0, Fr = 1, Wid = WID; Wid >= wid; i++, Fr *= 2, Wid /= 2) {
        double lene = ene[i];
        if (lene != 0) {
            for (int fr = 0; fr < Fr; fr++) {
                for (int k = 0; k < Wid; k++) {
                    Specs[i][fr][k] = sqrt(Specs[i][fr][k]*lene);
                }
            }
        }
    }

    double e[32];
    for (int i = 0; i < 32; i++) e[i] = 1;

    DoMixSpectrogramBlock2
        (spl, Spec, Specs, WID, 0, 0, 0, normmix, log2(WID/wid)+1, e);

    return result;
}

/**
 * MixSpectrogram2 does the work for Fr frames (the largest frame),
 * which basically calls MixSpectrogramBlock2 Fr times.
 *
 * the 3-D array Specs is the multiple spectrograms calculated with
 * window sizes between wid and WID, Specs[0] is the 0th spectrogram,
 * etc.
 *
 * spl and Spec for all frames are returned by MixSpectrogram2, each
 * as a 2-D array.
 */
double
AdaptiveSpectrogram::MixSpectrogram2(int** spl, double** Spec, double*** Specs, int Fr,
				     int WID, int wid, bool norm, bool normmix)
{
    // totally Fr frames of WID samples
    // each frame is divided into wid VERTICAL parts

    int Res = log2(WID/wid)+1;
    double*** lSpecs = new double**[Res];

    for (int i = 0; i < Fr; i++) {

        for (int j = 0, nfr = 1; j < Res; j++, nfr *= 2) {
            lSpecs[j] = &Specs[j][i*nfr];
        }

        MixSpectrogramBlock2(spl[i], Spec[i], lSpecs, WID, wid, norm);
    }

    delete[] lSpecs;
    return 0;
}
author	Chris Cannam <c.cannam@qmul.ac.uk>
date	Thu, 02 Apr 2009 11:18:22 +0000
parents	385bec9df059
children	8700a93424f4