view transform/FeatureExtractionPluginTransform.cpp @ 76:af2725b5d6fe

* Implement harmonic cursor in spectrogram * Implement layer export. This doesn't quite do the right thing for the SV XML layer export yet -- it doesn't include layer display information, so when imported, it only creates an invisible model. Could also do with fixing CSV file import so as to work correctly for note and text layers.
author Chris Cannam
date Mon, 10 Apr 2006 17:22:59 +0000
parents 47fd14e29813
children 8cd01027502f
line wrap: on
line source

/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2006 Chris Cannam.
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include "FeatureExtractionPluginTransform.h"

#include "plugin/FeatureExtractionPluginFactory.h"
#include "plugin/PluginXml.h"
#include "vamp-sdk/Plugin.h"

#include "base/Model.h"
#include "base/Window.h"
#include "model/SparseOneDimensionalModel.h"
#include "model/SparseTimeValueModel.h"
#include "model/DenseThreeDimensionalModel.h"
#include "model/DenseTimeValueModel.h"

#include <fftw3.h>

#include <iostream>

FeatureExtractionPluginTransform::FeatureExtractionPluginTransform(Model *inputModel,
								   QString pluginId,
                                                                   int channel,
                                                                   QString configurationXml,
								   QString outputName) :
    Transform(inputModel),
    m_plugin(0),
    m_channel(channel),
    m_stepSize(0),
    m_blockSize(0),
    m_descriptor(0),
    m_outputFeatureNo(0)
{
    std::cerr << "FeatureExtractionPluginTransform::FeatureExtractionPluginTransform: plugin " << pluginId.toStdString() << ", outputName " << outputName.toStdString() << std::endl;

    FeatureExtractionPluginFactory *factory =
	FeatureExtractionPluginFactory::instanceFor(pluginId);

    if (!factory) {
	std::cerr << "FeatureExtractionPluginTransform: No factory available for plugin id \""
		  << pluginId.toStdString() << "\"" << std::endl;
	return;
    }

    m_plugin = factory->instantiatePlugin(pluginId, m_input->getSampleRate());

    if (!m_plugin) {
	std::cerr << "FeatureExtractionPluginTransform: Failed to instantiate plugin \""
		  << pluginId.toStdString() << "\"" << std::endl;
	return;
    }

    if (configurationXml != "") {
        PluginXml(m_plugin).setParametersFromXml(configurationXml);
    }

    m_blockSize = m_plugin->getPreferredBlockSize();
    m_stepSize = m_plugin->getPreferredStepSize();

    if (m_blockSize == 0) m_blockSize = 1024; //!!! todo: ask user
    if (m_stepSize == 0) m_stepSize = m_blockSize; //!!! likewise

    //!!! cope with plugins that request non-power-of-2 block sizes in
    // the frequency domain!

    Vamp::Plugin::OutputList outputs = m_plugin->getOutputDescriptors();

    if (outputs.empty()) {
	std::cerr << "FeatureExtractionPluginTransform: Plugin \""
		  << pluginId.toStdString() << "\" has no outputs" << std::endl;
	return;
    }
    
    for (size_t i = 0; i < outputs.size(); ++i) {
	if (outputName == "" || outputs[i].name == outputName.toStdString()) {
	    m_outputFeatureNo = i;
	    m_descriptor = new Vamp::Plugin::OutputDescriptor
		(outputs[i]);
	    break;
	}
    }

    if (!m_descriptor) {
	std::cerr << "FeatureExtractionPluginTransform: Plugin \""
		  << pluginId.toStdString() << "\" has no output named \""
		  << outputName.toStdString() << "\"" << std::endl;
	return;
    }

    std::cerr << "FeatureExtractionPluginTransform: output sample type "
	      << m_descriptor->sampleType << std::endl;

    int binCount = 1;
    float minValue = 0.0, maxValue = 0.0;
    
    if (m_descriptor->hasFixedBinCount) {
	binCount = m_descriptor->binCount;
    }

    if (binCount > 0 && m_descriptor->hasKnownExtents) {
	minValue = m_descriptor->minValue;
	maxValue = m_descriptor->maxValue;
    }

    size_t modelRate = m_input->getSampleRate();
    size_t modelResolution = 1;
    
    switch (m_descriptor->sampleType) {

    case Vamp::Plugin::OutputDescriptor::VariableSampleRate:
	if (m_descriptor->sampleRate != 0.0) {
	    modelResolution = size_t(modelRate / m_descriptor->sampleRate + 0.001);
	}
	break;

    case Vamp::Plugin::OutputDescriptor::OneSamplePerStep:
	modelResolution = m_stepSize;
	break;

    case Vamp::Plugin::OutputDescriptor::FixedSampleRate:
	modelRate = size_t(m_descriptor->sampleRate + 0.001);
	break;
    }

    if (binCount == 0) {

	m_output = new SparseOneDimensionalModel(modelRate, modelResolution,
						 false);

    } else if (binCount == 1 ||

	       // We don't have a sparse 3D model
	       m_descriptor->sampleType ==
	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {
	
        SparseTimeValueModel *model = new SparseTimeValueModel
            (modelRate, modelResolution, minValue, maxValue, false);
        model->setScaleUnits(outputs[m_outputFeatureNo].unit.c_str());

        m_output = model;

    } else {
	
	m_output = new DenseThreeDimensionalModel(modelRate, modelResolution,
						  binCount, false);

	if (!m_descriptor->binNames.empty()) {
	    std::vector<QString> names;
	    for (size_t i = 0; i < m_descriptor->binNames.size(); ++i) {
		names.push_back(m_descriptor->binNames[i].c_str());
	    }
	    (dynamic_cast<DenseThreeDimensionalModel *>(m_output))
		->setBinNames(names);
	}
    }
}

FeatureExtractionPluginTransform::~FeatureExtractionPluginTransform()
{
    delete m_plugin;
    delete m_descriptor;
}

DenseTimeValueModel *
FeatureExtractionPluginTransform::getInput()
{
    DenseTimeValueModel *dtvm =
	dynamic_cast<DenseTimeValueModel *>(getInputModel());
    if (!dtvm) {
	std::cerr << "FeatureExtractionPluginTransform::getInput: WARNING: Input model is not conformable to DenseTimeValueModel" << std::endl;
    }
    return dtvm;
}

void
FeatureExtractionPluginTransform::run()
{
    DenseTimeValueModel *input = getInput();
    if (!input) return;

    if (!m_output) return;

    size_t channelCount = input->getChannelCount();
    if (m_plugin->getMaxChannelCount() < channelCount) {
	channelCount = 1;
    }
    if (m_plugin->getMinChannelCount() > channelCount) {
	std::cerr << "FeatureExtractionPluginTransform::run: "
		  << "Can't provide enough channels to plugin (plugin min "
		  << m_plugin->getMinChannelCount() << ", max "
		  << m_plugin->getMaxChannelCount() << ", input model has "
		  << input->getChannelCount() << ")" << std::endl;
	return;
    }

    size_t sampleRate = m_input->getSampleRate();

    if (!m_plugin->initialise(channelCount, m_stepSize, m_blockSize)) {
        std::cerr << "FeatureExtractionPluginTransform::run: Plugin "
                  << m_plugin->getName() << " failed to initialise!" << std::endl;
        return;
    }

    float **buffers = new float*[channelCount];
    for (size_t ch = 0; ch < channelCount; ++ch) {
	buffers[ch] = new float[m_blockSize];
    }

    double *fftInput = 0;
    fftw_complex *fftOutput = 0;
    fftw_plan fftPlan = 0;
    Window<double> windower(HanningWindow, m_blockSize);

    if (m_plugin->getInputDomain() == Vamp::Plugin::FrequencyDomain) {

        fftInput = (double *)fftw_malloc(m_blockSize * sizeof(double));
        fftOutput = (fftw_complex *)fftw_malloc(m_blockSize * sizeof(fftw_complex));
        fftPlan = fftw_plan_dft_r2c_1d(m_blockSize, fftInput, fftOutput,
                                       FFTW_ESTIMATE);
        if (!fftPlan) {
            std::cerr << "ERROR: FeatureExtractionPluginTransform::run(): fftw_plan failed! Results will be garbage" << std::endl;
        }
    }

    long startFrame = m_input->getStartFrame();
    long   endFrame = m_input->getEndFrame();
    long blockFrame = startFrame;

    long prevCompletion = 0;

    while (1) {

        if (fftPlan) {
            if (blockFrame - int(m_blockSize)/2 > endFrame) break;
        } else {
            if (blockFrame >= endFrame) break;
        }

//	std::cerr << "FeatureExtractionPluginTransform::run: blockFrame "
//		  << blockFrame << std::endl;

	long completion =
	    (((blockFrame - startFrame) / m_stepSize) * 99) /
	    (   (endFrame - startFrame) / m_stepSize);

	// channelCount is either m_input->channelCount or 1

        for (size_t ch = 0; ch < channelCount; ++ch) {
            if (fftPlan) {
                getFrames(ch, channelCount, 
                          blockFrame - m_blockSize/2, m_blockSize, buffers[ch]);
            } else {
                getFrames(ch, channelCount, 
                           blockFrame, m_blockSize, buffers[ch]);
            }                
        }
        
        if (fftPlan) {
            for (size_t ch = 0; ch < channelCount; ++ch) {
                for (size_t i = 0; i < m_blockSize; ++i) {
                    fftInput[i] = buffers[ch][i];
                }
                windower.cut(fftInput);
                for (size_t i = 0; i < m_blockSize/2; ++i) {
                    double temp = fftInput[i];
                    fftInput[i] = fftInput[i + m_blockSize/2];
                    fftInput[i + m_blockSize/2] = temp;
                }
                fftw_execute(fftPlan);
                for (size_t i = 0; i < m_blockSize/2; ++i) {
                    buffers[ch][i*2] = fftOutput[i][0];
                    buffers[ch][i*2 + 1] = fftOutput[i][1];
                }
            }
        }

	Vamp::Plugin::FeatureSet features = m_plugin->process
	    (buffers, Vamp::RealTime::frame2RealTime(blockFrame, sampleRate));

	for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
	    Vamp::Plugin::Feature feature =
		features[m_outputFeatureNo][fi];
	    addFeature(blockFrame, feature);
	}

	if (blockFrame == startFrame || completion > prevCompletion) {
	    setCompletion(completion);
	    prevCompletion = completion;
	}

	blockFrame += m_stepSize;
    }

    if (fftPlan) {
        fftw_destroy_plan(fftPlan);
        fftw_free(fftInput);
        fftw_free(fftOutput);
    }

    Vamp::Plugin::FeatureSet features = m_plugin->getRemainingFeatures();

    for (size_t fi = 0; fi < features[m_outputFeatureNo].size(); ++fi) {
	Vamp::Plugin::Feature feature =
	    features[m_outputFeatureNo][fi];
	addFeature(blockFrame, feature);
    }

    setCompletion(100);
}

void
FeatureExtractionPluginTransform::getFrames(int channel, int channelCount,
                                            long startFrame, long size,
                                            float *buffer)
{
    long offset = 0;

    if (startFrame < 0) {
        for (int i = 0; i < size && startFrame + i < 0; ++i) {
            buffer[i] = 0.0f;
        }
        offset = -startFrame;
        size -= offset;
        if (size <= 0) return;
        startFrame = 0;
    }

    long got = getInput()->getValues
        ((channelCount == 1 ? m_channel : channel),
         startFrame, startFrame + size, buffer + offset);

    while (got < size) {
        buffer[offset + got] = 0.0;
        ++got;
    }

    if (m_channel == -1 && channelCount == 1 &&
        getInput()->getChannelCount() > 1) {
        // use mean instead of sum, as plugin input
        int cc = getInput()->getChannelCount();
        for (long i = 0; i < size; ++i) {
            buffer[i] /= cc;
        }
    }
}

void
FeatureExtractionPluginTransform::addFeature(size_t blockFrame,
					     const Vamp::Plugin::Feature &feature)
{
    size_t inputRate = m_input->getSampleRate();

//    std::cerr << "FeatureExtractionPluginTransform::addFeature("
//	      << blockFrame << ")" << std::endl;

    int binCount = 1;
    if (m_descriptor->hasFixedBinCount) {
	binCount = m_descriptor->binCount;
    }

    size_t frame = blockFrame;

    if (m_descriptor->sampleType ==
	Vamp::Plugin::OutputDescriptor::VariableSampleRate) {

	if (!feature.hasTimestamp) {
	    std::cerr
		<< "WARNING: FeatureExtractionPluginTransform::addFeature: "
		<< "Feature has variable sample rate but no timestamp!"
		<< std::endl;
	    return;
	} else {
	    frame = Vamp::RealTime::realTime2Frame(feature.timestamp, inputRate);
	}

    } else if (m_descriptor->sampleType ==
	       Vamp::Plugin::OutputDescriptor::FixedSampleRate) {

	if (feature.hasTimestamp) {
	    //!!! warning: sampleRate may be non-integral
	    frame = Vamp::RealTime::realTime2Frame(feature.timestamp,
                                                   m_descriptor->sampleRate);
	} else {
	    frame = m_output->getEndFrame() + 1;
	}
    }
	
    if (binCount == 0) {

	SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
	if (!model) return;
	model->addPoint(SparseOneDimensionalModel::Point(frame, feature.label.c_str()));
	
    } else if (binCount == 1 ||
	       m_descriptor->sampleType == 
	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {

	float value = 0.0;
	if (feature.values.size() > 0) value = feature.values[0];

	SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
	if (!model) return;
	model->addPoint(SparseTimeValueModel::Point(frame, value, feature.label.c_str()));
	
    } else {
	
	DenseThreeDimensionalModel::BinValueSet values = feature.values;
	
	DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
	if (!model) return;

	model->setBinValues(frame, values);
    }
}

void
FeatureExtractionPluginTransform::setCompletion(int completion)
{
    int binCount = 1;
    if (m_descriptor->hasFixedBinCount) {
	binCount = m_descriptor->binCount;
    }

    if (binCount == 0) {

	SparseOneDimensionalModel *model = getOutput<SparseOneDimensionalModel>();
	if (!model) return;
	model->setCompletion(completion);

    } else if (binCount == 1 ||
	       m_descriptor->sampleType ==
	       Vamp::Plugin::OutputDescriptor::VariableSampleRate) {

	SparseTimeValueModel *model = getOutput<SparseTimeValueModel>();
	if (!model) return;
	model->setCompletion(completion);

    } else {

	DenseThreeDimensionalModel *model = getOutput<DenseThreeDimensionalModel>();
	if (!model) return;
	model->setCompletion(completion);
    }
}