view rdf/RDFFeatureWriter.cpp @ 587:94054c97c76a

* Add forgotten angle brackets to signal type URI
author Chris Cannam
date Wed, 13 May 2009 12:59:07 +0000
parents 4ab8e8f7d7ed
children 063da8955f65
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

/*
    Sonic Annotator
    A utility for batch feature extraction from audio files.
    Mark Levy, Chris Sutton and Chris Cannam, Queen Mary, University of London.
    Copyright 2007-2008 QMUL.

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#include <fstream>

#include "vamp-hostsdk/PluginHostAdapter.h"
#include "vamp-hostsdk/PluginLoader.h"

#include "RDFFeatureWriter.h"
#include "RDFTransformFactory.h"

#include <QTextStream>
#include <QUrl>
#include <QFileInfo>
#include <QRegExp>

using namespace std;
using Vamp::Plugin;
using Vamp::PluginBase;

RDFFeatureWriter::RDFFeatureWriter() :
    FileFeatureWriter(SupportOneFilePerTrackTransform |
                      SupportOneFilePerTrack |
                      SupportOneFileTotal,
                      "n3"),
    m_plain(false),
    m_count(0)
{
}

RDFFeatureWriter::~RDFFeatureWriter()
{
}

RDFFeatureWriter::ParameterList
RDFFeatureWriter::getSupportedParameters() const
{
    ParameterList pl = FileFeatureWriter::getSupportedParameters();
    Parameter p;

    p.name = "plain";
    p.description = "Use \"plain\" RDF even if transform metadata is available.";
    p.hasArg = false;
    pl.push_back(p);

    p.name = "audiofile-uri";
    p.description = "Link the output RDF to the given audio file URI instead of its actual location.";
    p.hasArg = true;
    pl.push_back(p);
    
    return pl;
}

void
RDFFeatureWriter::setParameters(map<string, string> &params)
{
    FileFeatureWriter::setParameters(params);

    for (map<string, string>::iterator i = params.begin();
         i != params.end(); ++i) {
        if (i->first == "plain") {
            m_plain = true;
        }
        if (i->first == "audiofile-uri") {
            m_suri = i->second.c_str();
        }
    }
}

void
RDFFeatureWriter::setTrackMetadata(QString trackId,
                                   TrackMetadata metadata)
{
    m_metadata[trackId] = metadata;
}

void
RDFFeatureWriter::setFixedEventTypeURI(QString uri)
{
    m_fixedEventTypeURI = uri;
}

void
RDFFeatureWriter::write(QString trackId,
                        const Transform &transform,
                        const Plugin::OutputDescriptor& output,
                        const Plugin::FeatureList& features,
                        std::string summaryType)
{
    QString pluginId = transform.getPluginIdentifier();

    if (m_rdfDescriptions.find(pluginId) == m_rdfDescriptions.end()) {

        m_rdfDescriptions[pluginId] = PluginRDFDescription(pluginId);

        if (m_rdfDescriptions[pluginId].haveDescription()) {
            cerr << "NOTE: Have RDF description for plugin ID \""
                 << pluginId.toStdString() << "\"" << endl;
        } else {
            cerr << "NOTE: Do not have RDF description for plugin ID \""
                 << pluginId.toStdString() << "\"" << endl;
        }
    }

    // Need to select appropriate output file for our track/transform
    // combination

    QTextStream *stream = getOutputStream(trackId, transform.getIdentifier());
    if (!stream) {
        std::cerr << "RDFFeatureWriter::write: ERROR: No output stream for track id \""
                  << trackId.toStdString() << "\" and transform \""
                  << transform.getIdentifier().toStdString() << "\"" << std::endl;
        exit(1);
    }

    if (m_startedStreamTransforms.find(stream) ==
        m_startedStreamTransforms.end()) {
        cerr << "This stream is new, writing prefixes" << endl;
        writePrefixes(stream);
        if (m_singleFileName == "" && !m_stdout) {
            writeSignalDescription(stream, trackId);
        }
    }

    if (m_startedStreamTransforms[stream].find(transform) ==
        m_startedStreamTransforms[stream].end()) {
        m_startedStreamTransforms[stream].insert(transform);
        writeLocalFeatureTypes
            (stream, transform, output, m_rdfDescriptions[pluginId]);
    }

    if (m_singleFileName != "" || m_stdout) {
        if (m_startedTrackIds.find(trackId) == m_startedTrackIds.end()) {
            writeSignalDescription(stream, trackId);
            m_startedTrackIds.insert(trackId);
        }
    }

    QString timelineURI = m_trackTimelineURIs[trackId];
    
    if (timelineURI == "") {
        cerr << "RDFFeatureWriter: INTERNAL ERROR: writing features without having established a timeline URI!" << endl;
        exit(1);
    }

    if (summaryType != "") {

        writeSparseRDF(stream, transform, output, features,
                       m_rdfDescriptions[pluginId], timelineURI);

    } else if (m_rdfDescriptions[pluginId].haveDescription() &&
               m_rdfDescriptions[pluginId].getOutputDisposition
               (output.identifier.c_str()) == 
               PluginRDFDescription::OutputDense) {

        QString signalURI = m_trackSignalURIs[trackId];

        if (signalURI == "") {
            cerr << "RDFFeatureWriter: INTERNAL ERROR: writing dense features without having established a signal URI!" << endl;
            exit(1);
        }

        writeDenseRDF(stream, transform, output, features,
                      m_rdfDescriptions[pluginId], signalURI, timelineURI);

    } else if (!m_plain &&
               m_rdfDescriptions[pluginId].haveDescription() &&
               m_rdfDescriptions[pluginId].getOutputDisposition
               (output.identifier.c_str()) ==
               PluginRDFDescription::OutputTrackLevel &&
               m_rdfDescriptions[pluginId].getOutputFeatureAttributeURI
               (output.identifier.c_str()) != "") {

        QString signalURI = m_trackSignalURIs[trackId];

        if (signalURI == "") {
            cerr << "RDFFeatureWriter: INTERNAL ERROR: writing track-level features without having established a signal URI!" << endl;
            exit(1);
        }

        writeTrackLevelRDF(stream, transform, output, features,
                           m_rdfDescriptions[pluginId], signalURI);

    } else {

        writeSparseRDF(stream, transform, output, features,
                       m_rdfDescriptions[pluginId], timelineURI);
    }
}

void
RDFFeatureWriter::writePrefixes(QTextStream *sptr)
{
    QTextStream &stream = *sptr;

    stream << "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n"
           << "@prefix mo: <http://purl.org/ontology/mo/> .\n"
           << "@prefix af: <http://purl.org/ontology/af/> .\n"
           << "@prefix foaf: <http://xmlns.com/foaf/0.1/> . \n"
           << "@prefix event: <http://purl.org/NET/c4dm/event.owl#> .\n"
           << "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n"
           << "@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n"
           << "@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n"
           << "@prefix tl: <http://purl.org/NET/c4dm/timeline.owl#> .\n"
           << "@prefix vamp: <http://purl.org/ontology/vamp/> .\n"
           << "@prefix : <#> .\n\n";
}

void
RDFFeatureWriter::writeSignalDescription(QTextStream *sptr,
                                         QString trackId)
{
    std::cerr << "RDFFeatureWriter::writeSignalDescription" << std::endl;

    QTextStream &stream = *sptr;

    /*
     * Describe signal we're analysing (AudioFile, Signal, TimeLine, etc.)
     */
    
    QUrl url(trackId);
    QString scheme = url.scheme().toLower();
    bool local = (scheme == "" || scheme == "file" || scheme.length() == 1);

    if (local) {
        if (scheme == "") {
            url.setScheme("file");
            url.setPath(QFileInfo(url.path()).absoluteFilePath());
        } else if (scheme.length() == 1) { // DOS drive letter!
            url.setScheme("file");
            url.setPath(scheme + ":" + url.path());
        }
    }

    //!!! FIX: If we are appending, we need to start counting after
    //all of the existing counts that are already in the file!

    uint64_t signalCount = m_count++;

    if (m_trackSignalURIs.find(trackId) == m_trackSignalURIs.end()) {
        m_trackSignalURIs[trackId] = QString(":signal_%1").arg(signalCount);
    }
    QString signalURI = m_trackSignalURIs[trackId];
   
    if (m_trackTrackURIs.find(trackId) == m_trackTrackURIs.end()) {
        m_trackTrackURIs[trackId] = QString(":track_%1").arg(signalCount);
    }
    QString trackURI = m_trackTrackURIs[trackId];
    
    if (m_trackTimelineURIs.find(trackId) == m_trackTimelineURIs.end()) {
        m_trackTimelineURIs[trackId] = QString(":signal_timeline_%1").arg(signalCount);
    }
    QString timelineURI = m_trackTimelineURIs[trackId];

    QString afURI = url.toEncoded().data();
    if (m_suri != NULL) afURI = m_suri;

    if (m_metadata.find(trackId) != m_metadata.end()) {
        TrackMetadata tm = m_metadata[trackId];
        if (tm.title != "" || tm.maker != "") {
            // We only write a Track at all if we have some
            // title/artist metadata to put in it.  Otherwise we can't
            // be sure that what we have is a Track, in the
            // publication sense -- it may just be a fragment, a test
            // file, whatever.  Since we'd have no metadata to
            // associate with our Track, the only effect of including
            // a Track would be to assert that this was one, which is
            // the one thing we wouldn't know...
            stream << trackURI << " a mo:Track ";
            if (tm.title != "") {
                stream << ";\n    dc:title \"\"\"" << tm.title << "\"\"\" ";
            }
            if (tm.maker != "") {
                stream << ";\n    foaf:maker [ a mo:MusicArtist; foaf:name \"\"\"" << tm.maker << "\"\"\" ] ";
            }
            if (trackId != "") {
                stream << ";\n    mo:available_as <" << afURI << "> ";
            }
            stream << ".\n\n";
        }
    }

    if (trackId != "") {
        stream << "<" << afURI << "> a mo:AudioFile ;\n";
        stream << "    mo:encodes " << signalURI << ".\n\n";
    }

    stream << signalURI << " a mo:Signal ;\n";

    stream << "    mo:time [\n"
           << "        a tl:Interval ;\n"
           << "        tl:onTimeLine "
           << timelineURI << "\n    ] .\n\n";

    stream << timelineURI << " a tl:Timeline .\n";
} 

void
RDFFeatureWriter::writeLocalFeatureTypes(QTextStream *sptr,
                                         const Transform &transform,
                                         const Plugin::OutputDescriptor &od,
                                         PluginRDFDescription &desc)
{
    QString outputId = od.identifier.c_str();
    QTextStream &stream = *sptr;

    // There is no "needFeatureType" for track-level outputs, because
    // we can't meaningfully write a feature at all if we don't know
    // what property to use for it.  If the output is track level but
    // there is no feature type given, we have to revert to events.

    bool needEventType = false;
    bool needSignalType = false;

    //!!! bin names, extents and so on can be written out using e.g. vamp:bin_names ( "a" "b" "c" ) 

    if (desc.getOutputDisposition(outputId) == 
        PluginRDFDescription::OutputDense) {

        // no feature events, so may need signal type but won't need
        // event type

        if (m_plain) {

            needSignalType = true;

        } else if (desc.getOutputSignalTypeURI(outputId) == "") {
            
            needSignalType = true;
        }

    } else if (desc.getOutputDisposition(outputId) ==
               PluginRDFDescription::OutputTrackLevel) {

        // see note above -- need to generate an event type if no
        // feature type given, or if in plain mode

        if (m_plain) {
        
            needEventType = true;

        } else if (desc.getOutputFeatureAttributeURI(outputId) == "") {
    
            if (desc.getOutputEventTypeURI(outputId) == "") {

                needEventType = true;
            }
        }

    } else {

        // may need event type but won't need signal type

        if (m_plain) {
        
            needEventType = true;
    
        } else if (desc.getOutputEventTypeURI(outputId) == "") {

            needEventType = true;
        }
    }

    QString transformUri;
    if (m_transformURIs.find(transform) != m_transformURIs.end()) {
        transformUri = m_transformURIs[transform];
    } else {
        transformUri = QString(":transform_%1_%2").arg(m_count++).arg(outputId);
        m_transformURIs[transform] = transformUri;
    }

    if (transform.getIdentifier() != "") {
        stream << endl
               << RDFTransformFactory::writeTransformToRDF(transform, transformUri)
               << endl;
    }

    if (needEventType && m_fixedEventTypeURI == "") {

        QString uri;
        if (m_syntheticEventTypeURIs.find(transform) !=
            m_syntheticEventTypeURIs.end()) {
            uri = m_syntheticEventTypeURIs[transform];
        } else {
            uri = QString(":event_type_%1").arg(m_count++);
            m_syntheticEventTypeURIs[transform] = uri;
        }

        stream << uri
               << " rdfs:subClassOf event:Event ;" << endl
               << "    dc:title \"" << od.name.c_str() << "\" ;" << endl
               << "    dc:format \"" << od.unit.c_str() << "\" ;" << endl
               << "    dc:description \"" << od.description.c_str() << "\" ."
               << endl << endl;
    }

    if (needSignalType) {

        QString uri;
        if (m_syntheticSignalTypeURIs.find(transform) !=
            m_syntheticSignalTypeURIs.end()) {
            uri = m_syntheticSignalTypeURIs[transform];
        } else {
            uri = QString(":signal_type_%1").arg(m_count++);
            m_syntheticSignalTypeURIs[transform] = uri;
        }

        stream << uri
               << " rdfs:subClassOf af:Signal ;" << endl
               << "    dc:title \"" << od.name.c_str() << "\" ;" << endl
               << "    dc:format \"" << od.unit.c_str() << "\" ;" << endl
               << "    dc:description \"" << od.description.c_str() << "\" ."
               << endl << endl;
    }
}

void
RDFFeatureWriter::writeSparseRDF(QTextStream *sptr,
                                 const Transform &transform,
                                 const Plugin::OutputDescriptor& od,
                                 const Plugin::FeatureList& featureList,
                                 PluginRDFDescription &desc,
                                 QString timelineURI)
{
//    std::cerr << "RDFFeatureWriter::writeSparseRDF: have " << featureList.size() << " features" << std::endl;

    if (featureList.empty()) return;
    QTextStream &stream = *sptr;
        
    bool plain = (m_plain || !desc.haveDescription());

    QString outputId = od.identifier.c_str();

    // iterate through FeatureLists
        
    for (int i = 0; i < featureList.size(); ++i) {

        const Plugin::Feature &feature = featureList[i];
        uint64_t featureNumber = m_count++;

        stream << ":event_" << featureNumber << " a ";

        if (m_fixedEventTypeURI != "") {
            stream << m_fixedEventTypeURI << " ;\n";
        } else {
            QString eventTypeURI = desc.getOutputEventTypeURI(outputId);
            if (plain || eventTypeURI == "") {
                if (m_syntheticEventTypeURIs.find(transform) != 
                    m_syntheticEventTypeURIs.end()) {
                    stream << m_syntheticEventTypeURIs[transform] << " ;\n";
                } else {
                    stream << ":event_type_" << outputId << " ;\n";
                }
            } else {
                stream << "<" << eventTypeURI << "> ;\n";
            }
        }

        QString timestamp = feature.timestamp.toString().c_str();
        timestamp.replace(QRegExp("^ +"), "");

        if (feature.hasDuration && feature.duration > Vamp::RealTime::zeroTime) {

            QString duration = feature.duration.toString().c_str();
            duration.replace(QRegExp("^ +"), "");

            stream << "    event:time [ \n"
                   << "        a tl:Interval ;\n"
                   << "        tl:onTimeLine " << timelineURI << " ;\n"
                   << "        tl:beginsAt \"PT" << timestamp
                   << "S\"^^xsd:duration ;\n"
                   << "        tl:duration \"PT" << duration
                   << "S\"^^xsd:duration ;\n"
                   << "    ] ";

        } else {

            stream << "    event:time [ \n"
                   << "        a tl:Instant ;\n" //location of the event in time
                   << "        tl:onTimeLine " << timelineURI << " ;\n"
                   << "        tl:at \"PT" << timestamp
                   << "S\"^^xsd:duration ;\n    ] ";
        }

        if (transform.getIdentifier() != "") {
            stream << ";\n";
            stream << "    vamp:computed_by " << m_transformURIs[transform] << " ";
        }

        if (feature.label.length() > 0) {
            stream << ";\n";
            stream << "    rdfs:label \"\"\"" << feature.label.c_str() << "\"\"\" ";
        }

        if (!feature.values.empty()) {
            stream << ";\n";
            //!!! named bins?
            stream << "    af:feature \"" << feature.values[0];
            for (int j = 1; j < feature.values.size(); ++j) {
                stream << " " << feature.values[j];
            }
            stream << "\" ";
        }

        stream << ".\n";
    }
}

void
RDFFeatureWriter::writeTrackLevelRDF(QTextStream *sptr,
                                     const Transform &transform,
                                     const Plugin::OutputDescriptor& od,
                                     const Plugin::FeatureList& featureList,
                                     PluginRDFDescription &desc,
                                     QString signalURI)
{
    if (featureList.empty()) return;
    QTextStream &stream = *sptr;
        
    bool plain = (m_plain || !desc.haveDescription());

    QString outputId = od.identifier.c_str();
    QString featureUri = desc.getOutputFeatureAttributeURI(outputId);

    if (featureUri == "") {
        cerr << "RDFFeatureWriter::writeTrackLevelRDF: ERROR: No feature URI available -- this function should not have been called!" << endl;
        return;
    }

    for (int i = 0; i < featureList.size(); ++i) {

        const Plugin::Feature &feature = featureList[i];

        if (feature.values.empty()) {

            if (feature.label == "") continue;

            stream << signalURI << " " << featureUri << " \"\"\""
                   << feature.label.c_str() << "\"\"\" .\n";

        } else {

            stream << signalURI << " " << featureUri << " \""
                   << feature.values[0] << "\"^^xsd:float .\n";
        }
    }
}

void
RDFFeatureWriter::writeDenseRDF(QTextStream *sptr,
                                const Transform &transform,
                                const Plugin::OutputDescriptor& od,
                                const Plugin::FeatureList& featureList,
                                PluginRDFDescription &desc,
                                QString signalURI, 
                                QString timelineURI)
{
    if (featureList.empty()) return;

    StringTransformPair sp(signalURI, transform);

    if (m_openDenseFeatures.find(sp) == m_openDenseFeatures.end()) {

        StreamBuffer b(sptr, "");
        m_openDenseFeatures[sp] = b;
        
        QString &str(m_openDenseFeatures[sp].second);
        QTextStream stream(&str);

        bool plain = (m_plain || !desc.haveDescription());
        QString outputId = od.identifier.c_str();

        uint64_t featureNumber = m_count++;

        // need to write out feature timeline map -- for this we need
        // the sample rate, window length and hop size from the
        // transform

        stream << "\n:feature_timeline_" << featureNumber << " a tl:DiscreteTimeLine .\n\n";

        size_t stepSize = transform.getStepSize();
        if (stepSize == 0) {
            cerr << "RDFFeatureWriter: INTERNAL ERROR: writing dense features without having set the step size properly!" << endl;
            return;
        }

        size_t blockSize = transform.getBlockSize();
        if (blockSize == 0) {
            cerr << "RDFFeatureWriter: INTERNAL ERROR: writing dense features without having set the block size properly!" << endl;
            return;
        }

        float sampleRate = transform.getSampleRate();
        if (sampleRate == 0.f) {
            cerr << "RDFFeatureWriter: INTERNAL ERROR: writing dense features without having set the sample rate properly!" << endl;
            return;
        }

        stream << ":feature_timeline_map_" << featureNumber
               << " a tl:UniformSamplingWindowingMap ;\n"
               << "    tl:rangeTimeLine :feature_timeline_" << featureNumber << " ;\n"
               << "    tl:domainTimeLine " << timelineURI << " ;\n"
               << "    tl:sampleRate \"" << int(sampleRate) << "\"^^xsd:int ;\n"
               << "    tl:windowLength \"" << blockSize << "\"^^xsd:int ;\n"
               << "    tl:hopSize \"" << stepSize << "\"^^xsd:int .\n\n";

        stream << signalURI << " af:signal_feature :feature_"
               << featureNumber << " ." << endl << endl;

        stream << ":feature_" << featureNumber << " a ";

        QString signalTypeURI = desc.getOutputSignalTypeURI(outputId);
        if (plain || signalTypeURI == "") {
            if (m_syntheticSignalTypeURIs.find(transform) !=
                m_syntheticSignalTypeURIs.end()) {
                stream << m_syntheticSignalTypeURIs[transform] << " ;\n";
            } else {
                stream << ":signal_type_" << outputId << " ;\n";
            }
        } else {
            stream << "<" << signalTypeURI << "> ;\n";
        }

        stream << "    mo:time ["
               << "\n        a tl:Interval ;"
               << "\n        tl:onTimeLine :feature_timeline_" << featureNumber << " ;";

        RealTime startrt = transform.getStartTime();
        RealTime durationrt = transform.getDuration();

        int start = RealTime::realTime2Frame(startrt, sampleRate) / stepSize;
        int duration = RealTime::realTime2Frame(durationrt, sampleRate) / stepSize;

        if (start != 0) {
            stream << "\n        tl:start \"" << start << "\"^^xsd:int ;";
        }
        if (duration != 0) {
            stream << "\n        tl:duration \"" << duration << "\"^^xsd:int ;";
        }

        stream << "\n    ] ;\n";

        if (transform.getIdentifier() != "") {
            stream << "    vamp:computed_by " << m_transformURIs[transform] << " ;\n";
        }

        if (od.hasFixedBinCount) {
            // We only know the height, so write the width as zero
            stream << "    af:dimensions \"" << od.binCount << " 0\" ;\n";
        }

        stream << "    af:value \"";
    }

    QString &str = m_openDenseFeatures[sp].second;
    QTextStream stream(&str);

    for (int i = 0; i < featureList.size(); ++i) {

        const Plugin::Feature &feature = featureList[i];

        for (int j = 0; j < feature.values.size(); ++j) {
            stream << feature.values[j] << " ";
        }
    }
}

void RDFFeatureWriter::finish()
{
//    cerr << "RDFFeatureWriter::finish()" << endl;

    // close any open dense feature literals

    for (map<StringTransformPair, StreamBuffer>::iterator i =
             m_openDenseFeatures.begin();
         i != m_openDenseFeatures.end(); ++i) {
//        cerr << "closing a stream" << endl;
        StreamBuffer &b = i->second;
        *(b.first) << b.second << "\" ." << endl;
    }

    m_openDenseFeatures.clear();
    m_startedStreamTransforms.clear();

    FileFeatureWriter::finish();
}