view utilities/composer/composer.cpp @ 42:add3570c6035

* Add new "write" option to composer utility * Default year transcodes to empty string instead of zero
author Chris Cannam
date Tue, 20 Apr 2010 15:21:32 +0100
parents 07efb25d24d6
children bcea875d8d2f
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

#include "Objects.h"
#include "TypeRegistrar.h"

#include <dataquay/BasicStore.h>
#include <dataquay/TransactionalStore.h>
#include <dataquay/RDFException.h>
#include <dataquay/objectmapper/ObjectLoader.h>
#include <dataquay/objectmapper/ObjectStorer.h>
#include <dataquay/objectmapper/ObjectMapper.h>
#include <dataquay/objectmapper/TypeMapping.h>
#include <dataquay/Debug.h>

#include <QMultiMap>
#include <QFileInfo>

#include <iostream>

using namespace Dataquay;
using namespace ClassicalData;
using namespace std;

ostream &operator<<(ostream &target, const QString &str)
{
    return target << str.toLocal8Bit().data();
}

ostream &operator<<(ostream &target, const QUrl &u)
{
    return target << "<" << u.toString() << ">";
}


bool
load(BasicStore *store, QString fileName)
{
    QUrl url = QUrl::fromLocalFile(fileName);

    cerr << "Importing from URL " << url << " ...";
    try {
	store->import(url, BasicStore::ImportPermitDuplicates);
    } catch (RDFException e) {
        cerr << " retrying with explicit ntriples type...";
        try {
            store->import(url, BasicStore::ImportPermitDuplicates, "ntriples");
        } catch (RDFException e) {
            cerr << "failed" << endl;
            cerr << "Import failed: " << e.what() << endl;
            return false;
        }
    }

    cerr << " done" << endl;
    return true;
}

void
usage(char *name)
{
    int s = 0;
    for (int i = 0; name[i]; ++i) if (name[i] == '/') s = i + 1;
    name = name + s;
    cerr << "Usage:" << endl;
    cerr << "  " << name << " <input-rdf-file> list" << endl;
    cerr << "  " << name << " <input-rdf-file> list-uris" << endl;
    cerr << "  " << name << " <input-rdf-file> show <uri> [<uri> ...]" << endl;
    cerr << "  " << name << " <input-rdf-file> search <text>" << endl;
    cerr << "  " << name << " <input-rdf-file> match <text>" << endl;
    cerr << "  " << name << " <input-rdf-file> merge <target-uri> <dup> [<dup> ...]" << endl;
    cerr << "  " << name << " <input-rdf-file> write" << endl;
    exit(-1);
}

static QList<Composer *> allComposers;
static QMap<Composer *, QSet<Work *> > worksMap;

void
show(Composer *c)
{
    cout << c->property("uri").value<Uri>() << endl;
    cout << c->getSortName(true);
    QString d = c->getDisplayDates();
    if (d != "") cout << " (" << d << ")";
    if (!c->nationality().empty() || c->period() != "") {
        cout << " [";
        bool first = true;
        foreach (QString n, c->nationality()) {
            if (!first) cout << "/";
            cout << n;
            first = false;
        }
        if (c->period() != "") {
            if (!first) cout << ", ";
            cout << c->period();
        }
        cout << "]";
    }
    if (c->gender() != "") {
        cout << " *" << c->gender();
    }
    if (!worksMap[c].empty()) {
        cout << " [" << worksMap[c].size() << " work(s)]";
    }
    cout << endl;
    foreach (QString a, c->aliases()) {
        cout << " - " << a << endl;
    }
    if (c->remarks() != "") {
        cout << " " << c->remarks() << endl;
    }
    foreach (Document *d, c->pages()) {
        cout << d->siteName() << " -> " << d->uri() << endl;
    }
    foreach (Uri u, c->otherURIs()) {
        cout << "Same as " << u << endl;
    }
}

void
showBrief(Composer *c)
{
    cout << c->property("uri").value<Uri>() << endl;
    cout << c->getSortName(false);
    QString d = c->getDisplayDates();
    if (d != "") cout << " (" << d << ")";
    if (!c->nationality().empty() || c->period() != "") {
        cout << " [";
        bool first = true;
        foreach (QString n, c->nationality()) {
            if (!first) cout << "/";
            cout << n;
            first = false;
        }
        if (c->period() != "") {
            if (!first) cout << " ";
            cout << c->period();
        }
        cout << "]";
    }
    if (c->gender() != "") {
        cout << " *" << c->gender();
    }
    if (!worksMap[c].empty()) {
        cout << " [" << worksMap[c].size() << " work(s)]";
    }
    cout << endl;
}

void
listBrief(QList<Composer *> composers)
{
    QMultiMap<QString, Composer *> sorted;
    foreach (Composer *c, composers) {
        sorted.insert(c->getSortName(false), c);
    }
    foreach (Composer *c, sorted) {
        showBrief(c);
    }
}

void
listUris(QList<Composer *> composers)
{
    QMultiMap<Uri, Composer *> sorted;
    foreach (Composer *c, composers) {
        sorted.insert(c->property("uri").value<Uri>(), c);
    }
    foreach (Uri uri, sorted.keys()) {
        cout << uri << endl;
    }
}

void
showSearchResults(QMultiMap<float, Composer *> matches, int count)
{
    int n = 0;
    for (QMultiMap<float, Composer *>::const_iterator i = matches.end();
         i != matches.begin(); ) {
        --i;
        if (i.key() <= 0) continue;
        cout << endl;
        if (n == 0) {
            cout << "Best match:" << endl;
        } else if (n == 1) {
            cout << "Other candidate(s):" << endl;
        }
        cout << "[" << i.key() << "] ";
        if (n == 0) show(i.value());
        else showBrief(i.value());
        if (++n > count) break;
    }
    if (n == 0) cout << "No matches" << endl;
    cout << endl;
}

void
search(QString typing)
{
    cout << "Searching (quick) for: " << typing << endl;
    QMultiMap<float, Composer *> matches;
    foreach (Composer *c, allComposers) {
        float value = c->matchTypingQuick(typing);
        matches.insert(value, c);
    }
    showSearchResults(matches, 5);

    cout << "Searching (slow) for: " << typing << endl;
    matches.clear();
    foreach (Composer *c, allComposers) {
        float value = c->matchTyping(typing);
        matches.insert(value, c);
    }
    showSearchResults(matches, 5);
}

void
match(QString text)
{
    cout << "Matching: " << text << endl;
    QMultiMap<float, Composer *> matches;
    QRegExp sre("[\\., -]+");
    QStringList elements = text.toLower().split(sre, QString::SkipEmptyParts);
    foreach (Composer *c, allComposers) {
        float value = c->matchFuzzyName(elements);
        matches.insert(value, c);
    }
    showSearchResults(matches, 5);
}

QList<Composer *>
matchWildcard(QString text)
{
    if (!text.contains('/') && !text.contains('*')) {
        text = "*" + text + "*";
    }
    QRegExp re(text, Qt::CaseInsensitive, QRegExp::Wildcard);
    QList<Composer *> results;
    foreach (Composer *c, allComposers) {
        if (re.exactMatch(c->property("uri").value<Uri>().toString())) {
            results.push_back(c);
        }
    }
    return results;
}    

Composer *
matchSingle(QString text)
{
    QList<Composer *> matches = matchWildcard(text);
    if (matches.empty()) {
        cerr << "matchSingle: No matches for " << text << endl;
        return 0;
    } else if (matches.size() > 1) {
        cerr << "matchSingle: Multiple matches for " << text << endl;
        return 0;
    }
    return matches[0];
}

void
showWildcard(QString text)
{
    cout << "Showing URI or wildcard: " << text << endl;
    cout << endl;
    foreach (Composer *c, matchWildcard(text)) {
        show(c);
        cout << endl;
    }
}

void
merge(Composer *target, QList<Composer *> sources, BasicStore *store)
{
    cout << "Merging into this composer record:" << endl << endl;
    show(target);
    cout << endl << "... the following composer record(s):" << endl;
    foreach (Composer *c, sources) {
        cout << endl;
        show(c);
        target->mergeFrom(c);

        QSet<Work *> works = worksMap[c];
        foreach (Work *w, works) {
            w->composition()->setComposer(target);
        }
        worksMap[target].unite(works);
        worksMap.remove(c);

        delete c->birth();
        delete c->death();
        delete c;

        //!!! and composition events!
    }
    cout << endl << "Result after merging:" << endl << endl;;
    show(target);
    cout << endl;
}

int
main(int argc, char **argv)
{
    if (argc < 3) usage(argv[0]);
    QString inFileName = argv[1];
    QString command = argv[2];
    QStringList args;
    for (int i = 3; i < argc; ++i) {
        args.push_back(argv[i]);
    }

    BasicStore *store = new BasicStore();
    store->setBaseUri(Uri("http://dbtune.org/classical/resource/"));
    ObjectLoader *loader = new ObjectLoader(store);
//    loader->setFollowPolicy(ObjectLoader::FollowObjectProperties);

    TypeMapping tm;

    TypeRegistrar::registerTypes();
    TypeRegistrar::addMappings(store, &tm);

    loader->setTypeMapping(tm);

    if (!load(store, inFileName)) {
	cerr << "Failed to load data source" << endl;
	return 1;
    }

    cerr << "Imported RDF data, mapping to objects...";
    QObjectList objects = loader->loadAll();
    cerr << " done" << endl;

    delete loader;

    bool write = false, writeFull = false;
    if (command == "merge") {
        write = true;
    } else if (command == "write") {
        writeFull = true;
    }

    TransactionalStore *ts = 0;
    ObjectMapper *mapper = 0;

    if (write) {
        cerr << "Managing objects...";
        ts = new TransactionalStore(store);
        mapper = new ObjectMapper(ts);
        mapper->setTypeMapping(tm);
        mapper->manage(objects);
        cerr << " done" << endl;
    }
    
    foreach (QObject *o, objects) {
        Composer *c = qobject_cast<Composer *>(o);
        if (c) allComposers.push_back(c);
    }
    
    QList<Work *> works;
    foreach (QObject *o, objects) {
        Work *w = qobject_cast<Work *>(o);
        if (w) works.push_back(w);
    }

    foreach (Work *w, works) {
        Composition *c = w->composition();
        if (c) {
            Composer *cp = c->composer();
            if (cp) worksMap[cp].insert(w);
        }
    }

    if (command == "write") {
        if (!args.empty()) usage(argv[0]);
    } else if (command == "list") {
        if (!args.empty()) usage(argv[0]);
        listBrief(allComposers);
    } else if (command == "list-uris") {
        if (!args.empty()) usage(argv[0]);
        listUris(allComposers);
    } else {
        if (args.empty()) usage(argv[0]);
        if (command == "show") {
            foreach (QString s, args) {
                showWildcard(s);
            }
        } else if (command == "search") {
            foreach (QString s, args) {
                search(s);
            }
        } else if (command == "match") {
            foreach (QString s, args) {
                match(s);
            }
        } else if (command == "merge") {
            if (args.size() < 2) usage(argv[0]);
            Composer *target = matchSingle(args[0]);
            if (!target) return 1;
            QList<Composer *> sources;
            for (int i = 1; i < args.size(); ++i) {
                Composer *c = matchSingle(args[i]);
                if (!c) return 1;
                sources.push_back(c);
            }
            merge(target, sources, store);
        }
    }
        
    if (write) {

        cerr << "Committing changes...";
        mapper->commit();
        cerr << " done" << endl;

        cerr << "Saving to file out.ttl...";
        store->save("out.ttl");
        cerr << " done" << endl;

    } else if (writeFull) {

        ObjectStorer *storer = new ObjectStorer(store);

        storer->setTypeMapping(tm);

        storer->setPropertyStorePolicy(ObjectStorer::StoreIfChanged);
        storer->setBlankNodePolicy(ObjectStorer::NoBlankNodes);

        cerr << "Mapping results back to store...";
        storer->setFollowPolicy(ObjectStorer::FollowObjectProperties);
        storer->store(objects);
        cerr << " done" << endl;
 
        cerr << "Saving to file out.ttl...";
        store->save("out.ttl");
        cerr << " done" << endl;

        delete storer;
    }

    delete mapper;
    delete ts;

    delete store;
}