diff import/Test.cpp @ 0:e8f4c2b55fd8 classical-rdf

* reorganise
author Chris Cannam
date Tue, 01 Dec 2009 17:50:41 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/import/Test.cpp	Tue Dec 01 17:50:41 2009 +0000
@@ -0,0 +1,831 @@
+/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
+
+#include "Objects.h"
+
+#include <dataquay/BasicStore.h>
+#include <dataquay/RDFException.h>
+#include <dataquay/objectmapper/ObjectMapper.h>
+#include <dataquay/objectmapper/ObjectBuilder.h>
+#include <dataquay/objectmapper/ContainerBuilder.h>
+
+#include "ImportClassicalComposersOrg.h"
+#include "ImportClassicalDotNet.h"
+#include "ImportWikipediaComposers.h"
+#include "ImportWikipediaWorks.h"
+#include "ImportWikipediaWorksK.h"
+#include "ImportWikipediaWorksList.h"
+#include "ImportHoboken.h"
+
+#include <dataquay/Debug.h>
+
+using namespace ClassicalData;
+using namespace Dataquay;
+
+#include <iostream>
+#include <set>
+
+typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
+
+bool datesMatch(Composer *a, Composer *b)
+{
+    if (a->birth() && b->birth()) {
+        if (abs(a->birth()->year() - b->birth()->year()) > 1) {
+            if ((!a->birth()->approximate() && !b->birth()->approximate()) ||
+                (abs(a->birth()->year() - b->birth()->year()) > 10)) {
+                return false;
+            }
+        }
+    }
+    if (a->death() && b->death()) {
+        if (abs(a->death()->year() - b->death()->year()) > 1) {
+            if ((!a->death()->approximate() && !b->death()->approximate()) ||
+                (abs(a->death()->year() - b->death()->year()) > 10)) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+void
+addMiscExpansions(Composer *c)
+{
+    QString n = c->name();
+
+    DEBUG << "addMiscExpansions: n = " << n << endl;
+
+    // lovely hard-coded special cases go here! some of these are
+    // needed for works->composer assignments
+    if (n == "Balakirev, Milii") {
+        c->addAlias("Mily Balakirev");
+    }
+    if (n.startsWith("Cui, C")) {
+        c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
+    }
+    if (n == "Handel, George Frideric") {
+        c->addAlias("Handel, Georg Friedrich");
+        c->addAlias("Handel");
+    }
+    if (n == "Mayr, Simon") {
+        c->addAlias("Mayr");
+    }
+
+    n.replace(", Sr.", " Sr.");
+    n.replace(", Jr.", " Jr.");
+
+    int comma = n.indexOf(", ");
+    if (comma > 0 && comma + 2 < n.length()) {
+
+        QString left = n.left(comma);
+        QString right = n.right(n.length() - comma - 2);
+
+        QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
+        if (jrsr.indexIn(right) >= 0) {
+            left = left + jrsr.cap(1);
+            right = right.left(right.length()-jrsr.matchedLength());
+        }
+        n = right + " " + left;
+    }
+
+    if (n != c->name()) c->addAlias(n);
+
+    if (n.contains("Sergey")) {
+        QString nn(n);
+        nn.replace("Sergey", "Sergei");
+        c->addAlias(nn);
+    }
+
+    QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
+    if (sr.indexIn(n) >= 0) {
+        QString nr = n;
+        nr.replace(sr.pos(0), sr.matchedLength(), " I");
+        nr.replace("  ", " ");
+        DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
+        c->addAlias(nr);
+    }
+    QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
+    if (jr.indexIn(n) >= 0) {
+        QString nr = n;
+        nr.replace(jr.pos(0), jr.matchedLength(), " II");
+        nr.replace("  ", " ");
+        DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
+        c->addAlias(nr);
+    }
+    QString nr = n;
+    nr.replace("(I)", "I");
+    nr.replace("(II)", "II");
+    nr.replace("(III)", "III");
+    c->addAlias(nr);
+}
+
+bool namesFuzzyMatch(QString an, Composer *b)
+{
+    // ew!
+
+    QString bn = b->name();
+    if (bn == an) return true;
+    if (b->aliases().contains(an)) return true;
+    int aSurnameIndex = 0, bSurnameIndex = 0;
+    if (an.contains(",")) {
+        an.replace(",", "");
+    } else {
+        aSurnameIndex = -1;
+    }
+    if (bn.contains(",")) {
+        bn.replace(",", "");
+    } else {
+        bSurnameIndex = -1;
+    }
+    QStringList nl = an.split(QRegExp("[ -]"));
+    QStringList bnl = bn.split(QRegExp("[ -]"));
+    int matchCount = 0;
+    QString surnameMatch = "";
+    if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
+    if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
+    if (nl[aSurnameIndex][0].isUpper() &&
+        nl[aSurnameIndex] != "Della" &&
+        nl[aSurnameIndex] == bnl[bSurnameIndex]) {
+        surnameMatch = nl[aSurnameIndex];
+    }
+    foreach (QString elt, nl) {
+        if (!elt[0].isUpper() || elt == "Della") continue;
+        if (bnl.contains(elt)) {
+            ++matchCount;
+            continue;
+        }
+    }
+    if (matchCount > 1 && surnameMatch != "") {
+        DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
+        return true;
+    }
+    return false;
+}
+
+bool
+hasBetterName(Composer *c, Composer *other)
+{
+    if (c->name() == other->name()) return false;
+
+    // Try to guess which of c and other is more likely to have a good
+    // "canonical form" of the composer's name
+
+    if (c->name().startsWith("van ")) {
+        return false; // wrong choice of sort for e.g. LvB; should be
+                      // Beethoven, Ludwig van, not van Beethoven, Ludwig
+    }
+    if (other->name().startsWith("van ")) {
+        return true;
+    }
+
+    if (c->aliases().size() != other->aliases().size()) {
+        // a rather weak heuristic
+        return c->aliases().size() > other->aliases().size();
+    }
+
+    if (c->name().contains(',') && !other->name().contains(',')) {
+        // another rather weak heuristic
+        return true;
+    }
+
+    return false;
+}
+
+void mergeComposer(Composer *c, ComposerMap &composers)
+{
+    QString name = c->name();
+
+    QSet<QString> allNames = c->aliases();
+    allNames.insert(name);
+    
+    QString dates;
+    if (c->birth()) {
+        if (c->death()) {
+            dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
+        } else {
+            dates = QString("%1-").arg(c->birth()->year());
+        }
+    }
+    if (dates != "") {
+        allNames.insert(dates);
+    }
+
+    QSet<Composer *> matches;
+
+    foreach (QString candidateName, allNames) {
+        QString key = candidateName.toLower();
+        if (composers.contains(key)) {
+            foreach (Composer *candidate, composers[key]) {
+                if (candidateName == dates) {
+                    if (!namesFuzzyMatch(c->name(), candidate) &&
+                        !namesFuzzyMatch(candidate->name(), c)) {
+                        DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
+                        continue;
+                    } else {
+                        DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
+                    }
+                } else {
+                    if (!datesMatch(c, candidate)) {
+                        DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
+                        continue;
+                    }
+                }
+                matches.insert(candidate);
+            }
+        }
+    }
+
+    if (matches.empty()) {
+        DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
+
+        if (!c->birth() && !c->death()) {
+            // laboriously look for fuzzy match across _all_ composers
+            for (ComposerMap::iterator i = composers.begin();
+                 i != composers.end(); ++i) {
+                foreach (Composer *candidate, *i) {
+                    if (namesFuzzyMatch(c->name(), candidate)) {
+                        DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
+                        matches.insert(candidate);
+                        break;
+                    }
+                }
+                if (!matches.empty()) break;
+            }
+        }
+
+        if (matches.empty()) {
+            foreach (QString candidateName, allNames) {
+                composers[candidateName.toLower()].insert(c);
+                DEBUG << "added for alias or date " << candidateName << endl;
+            }
+            return;
+        }
+    }
+
+    if (matches.size() > 1) {
+        DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
+    }
+
+    Composer *other = *matches.begin();
+
+    DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
+
+    if (hasBetterName(c, other)) {
+        other->addAlias(other->name());
+        other->setName(c->name());
+    } else {
+        other->addAlias(c->name());
+    }
+    composers[c->name().toLower()].insert(other);
+    DEBUG << "linking from alias " << c->name() << endl;
+
+    foreach (QString alias, c->aliases()) {
+        if (alias != other->name() && 
+            !other->aliases().contains(alias)) {
+            other->addAlias(alias);
+            composers[alias.toLower()].insert(other);
+            DEBUG << "linking from alias " << alias << endl;
+        }
+    }
+    
+    foreach (Document *d, c->pages()) {
+        bool found = false;
+        foreach (Document *dd, other->pages()) {
+            if (d->uri() == dd->uri()) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            d->setTopic(other);
+            other->addPage(d);
+        }
+    }
+
+    //!!! actually the "approximate" bits of the following are bogus;
+    // a source reporting birth or death date as approx is probably
+    // more accurate than one reporting an exact date
+
+    if (c->birth()) {
+        if (!other->birth() || other->birth()->approximate()) {
+            other->setBirth(c->birth());
+        }
+    }
+
+    if (c->death()) {
+        if (!other->death() || other->death()->approximate()) {
+            other->setDeath(c->death());
+        }
+    }
+
+    if (c->gender() != "") other->setGender(c->gender());
+    if (c->nationality() != "") other->setNationality(c->nationality());
+    if (c->remarks() != "") other->setRemarks(c->remarks());
+    if (c->period() != "") other->setPeriod(c->period());
+
+}
+    
+QString
+asciify(QString field)
+{
+    // accented characters etc -- add "ascii version" for dumb search purposes
+    QString ascii;
+    for (int i = 0; i < field.length(); ++i) {
+        QString dc = field[i].decomposition();
+        if (dc != "") ascii += dc[0];
+        else if (field[i] == QChar(0x00DF)) {
+            ascii += "ss";
+        } else {
+            ascii += field[i];
+        }
+    }
+    ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
+    ascii.replace(QString::fromUtf8("\342\200\222"), "-");
+    ascii.replace(QString::fromUtf8("\342\200\223"), "-");
+    ascii.replace(QString::fromUtf8("\342\200\224"), "-");
+    ascii.replace(QString::fromUtf8("\342\200\225"), "-");
+    return ascii;
+}
+
+void
+asciify(Composer *c)
+{
+    QString n = c->name();
+    QString asc = asciify(n);
+    if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
+    foreach (QString alias, c->aliases()) {
+        asc = asciify(alias);
+        if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
+    }
+}
+
+void
+asciify(Work *w)
+{
+    QString n = w->name();
+    QString asc = asciify(n);
+    if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
+    foreach (QString alias, w->aliases()) {
+        asc = asciify(alias);
+        if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
+    }
+}
+
+void
+assignUri(Store *s, Composer *c)
+{
+    static QSet<QString> convSet;
+    QString conv = c->name();
+    if (!conv.contains(",")) {
+        QStringList sl = conv.split(" ");
+        if (!sl.empty()) {
+            sl.push_front(sl[sl.size()-1]);
+            sl.removeLast();
+            conv = sl.join(" ");
+            DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
+        }
+    }
+    conv = asciify(conv);
+    conv.replace(" ", "_");
+    conv.replace("-", "_");
+    conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
+    conv = conv.toLower();
+    QString initial = conv;
+    int i = 1;
+    while (convSet.contains(conv)) {
+        conv = QString("%1__%2").arg(initial).arg(i);
+        i++;
+    }
+    convSet.insert(conv);
+    c->setProperty("uri", s->expand(":composer_" + conv));
+}
+
+void
+assignUri(Store *s, Work *w, Composer *c)
+{
+    QString pfx = c->property("uri").toUrl().toString();
+    DEBUG << "pfx = " << pfx << endl;
+    if (!pfx.contains("composer_")) pfx = "";
+    else pfx.replace(QRegExp("^.*composer_"), "");
+
+    static QSet<QString> convSet;
+    QString conv = w->catalogue();
+    if (conv == "") conv = w->opus();
+    conv = conv.replace(".", "");
+    bool hasOpus = (conv != "");
+    if (conv == "") conv = w->name();
+    if (w->number() != "") conv = conv + "_no" + w->number();
+    if (pfx != "") conv = pfx + "_" + conv;
+    conv = asciify(conv);
+    conv.replace(" ", "_");
+    conv.replace("-", "_");
+    conv.replace(":", "_");
+    conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
+    conv = conv.toLower();
+    // I think actually for works we want to merge duplicates rather than
+    // assign them separate URIs, _unless_ they lack a viable opus number
+    if (!hasOpus) {
+        QString initial = conv;
+        int i = 1;
+        while (convSet.contains(conv)) {
+            conv = QString("%1__%2").arg(initial).arg(i);
+            i++;
+        }
+    }
+    convSet.insert(conv);
+    w->setProperty("uri", s->expand(":work_" + conv));
+}
+
+void
+addDbpediaResource(Store *store, QObject *o, QString s)
+{
+    QUrl u = o->property("uri").toUrl();
+    if (u == QUrl()) return;
+    if (s.startsWith("http://en.wikipedia.org/wiki/")) {
+        store->add(Triple(u,
+                          "mo:wikipedia",
+                          QUrl(s)));
+        s.replace("http://en.wikipedia.org/wiki/",
+                  "http://dbpedia.org/resource/");
+        store->add(Triple(u,
+                          "owl:sameAs",
+                          QUrl(s)));
+    }
+}
+
+int main(int argc, char **argv)
+{
+    qRegisterMetaType<HistoricalEvent *>
+	("ClassicalData::HistoricalEvent*");
+    qRegisterMetaType<Birth *>
+	("ClassicalData::Birth*");
+    qRegisterMetaType<Death *>
+	("ClassicalData::Death*");
+    qRegisterMetaType<Composition *>
+	("ClassicalData::Composition*");
+    qRegisterMetaType<Work *>
+	("ClassicalData::Work*");
+    qRegisterMetaType<Movement *>
+	("ClassicalData::Movement*");
+    qRegisterMetaType<Composer *>
+	("ClassicalData::Composer*");
+    qRegisterMetaType<Document *>
+	("ClassicalData::Document*");
+    qRegisterMetaType<Form *>
+	("ClassicalData::Form*");
+    qRegisterMetaType<QSet<Work *> >
+	("QSet<ClassicalData::Work*>");
+    qRegisterMetaType<QSet<Movement *> >
+	("QSet<ClassicalData::Movement*>");
+    qRegisterMetaType<QSet<Document *> >
+	("QSet<ClassicalData::Document*>");
+    qRegisterMetaType<QSet<Form *> >
+	("QSet<ClassicalData::Form*>");
+    qRegisterMetaType<QSet<QString> >
+        ("QSet<QString>");
+
+    qRegisterMetaType<ClassicalComposersOrgImporter *>
+	("ClassicalData::ClassicalComposersOrgImporter*");
+    qRegisterMetaType<ClassicalDotNetImporter *>
+	("ClassicalData::ClassicalDotNetImporter*");
+    qRegisterMetaType<WikipediaComposersImporter *>
+	("ClassicalData::WikipediaComposersImporter*");
+    qRegisterMetaType<WikipediaWorksImporter *>
+	("ClassicalData::WikipediaWorksImporter*");
+    qRegisterMetaType<WikipediaWorksKImporter *>
+	("ClassicalData::WikipediaWorksKImporter*");
+    qRegisterMetaType<WikipediaWorksListImporter *>
+	("ClassicalData::WikipediaWorksListImporter*");
+    qRegisterMetaType<HobokenImporter *>
+	("ClassicalData::HobokenImporter*");
+
+    ObjectBuilder::getInstance()->registerClass
+	<HistoricalEvent>("ClassicalData::HistoricalEvent*");
+    ObjectBuilder::getInstance()->registerClass
+	<Birth>("ClassicalData::Birth*");
+    ObjectBuilder::getInstance()->registerClass
+	<Death>("ClassicalData::Death*");
+    ObjectBuilder::getInstance()->registerClass
+	<Composition>("ClassicalData::Composition*");
+    ObjectBuilder::getInstance()->registerClass
+	<Work, QObject>("ClassicalData::Work*");
+    ObjectBuilder::getInstance()->registerClass
+	<Movement, QObject>("ClassicalData::Movement*");
+    ObjectBuilder::getInstance()->registerClass
+	<Composer, QObject>("ClassicalData::Composer*");
+    ObjectBuilder::getInstance()->registerClass
+	<Document, QObject>("ClassicalData::Document*");
+    ObjectBuilder::getInstance()->registerClass
+	<Form, QObject>("ClassicalData::Form*");
+
+    ObjectBuilder::getInstance()->registerClass
+	<ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
+    ObjectBuilder::getInstance()->registerClass
+	<HobokenImporter>("ClassicalData::HobokenImporter*");
+
+    ContainerBuilder::getInstance()->registerContainer
+        <QString, QSet<QString> >
+        ("QString", "QSet<QString>", ContainerBuilder::SetKind);
+
+    ContainerBuilder::getInstance()->registerContainer
+        <Work*, QSet<Work*> >
+        ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
+         ContainerBuilder::SetKind);
+
+    ContainerBuilder::getInstance()->registerContainer
+        <Movement*, QSet<Movement*> >
+        ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
+         ContainerBuilder::SetKind);
+
+    ContainerBuilder::getInstance()->registerContainer
+        <Document*, QSet<Document*> >
+        ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
+         ContainerBuilder::SetKind);
+
+    ContainerBuilder::getInstance()->registerContainer
+        <Form*, QSet<Form*> >
+        ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
+         ContainerBuilder::SetKind);
+
+    BasicStore *store = BasicStore::load("file:importers.ttl");
+    ObjectMapper mapper(store);
+    QObject *parentObject = mapper.loadAllObjects(new QObject());
+    
+    BasicStore *outstore = new BasicStore();
+    ObjectMapper outmapper(outstore);
+
+    outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
+    
+    outstore->addPrefix("type", outmapper.getObjectTypePrefix());
+    outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/");
+    outstore->addPrefix("property", outmapper.getPropertyPrefix());
+    outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
+    outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
+    outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
+    outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
+    outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
+    outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
+    outstore->addPrefix("rdfs",  "http://www.w3.org/2000/01/rdf-schema#");
+
+    outmapper.addPropertyMapping("ClassicalData::Composer", "pages",
+                                 outstore->expand("foaf:page"));
+    outmapper.addPropertyMapping("ClassicalData::Composer", "name",
+                                 outstore->expand("foaf:name"));
+    outmapper.addPropertyMapping("ClassicalData::Composer", "aliases",
+                                 outstore->expand("property:also_known_as"));
+    outmapper.addPropertyMapping("ClassicalData::Document", "topic",
+                                 outstore->expand("foaf:primaryTopic"));
+
+    outmapper.addTypeMapping("ClassicalData::Work",
+                             outstore->expand("mo:MusicalWork"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "composition",
+                                 outstore->expand("mo:composed_in"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "opus",
+                                 outstore->expand("mo:opus"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "k6",
+                                 outstore->expand("mo:k6"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "bwv",
+                                 outstore->expand("mo:bwv"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "number",
+                                 outstore->expand("mo:number"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "partOf",
+                                 outstore->expand("dc:isPartOf"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "parts",
+                                 outstore->expand("dc:hasPart"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "pages",
+                                 outstore->expand("foaf:page"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "forms",
+                                 outstore->expand("property:form"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "key",
+                                 outstore->expand("mo:key"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "aliases",
+                                 outstore->expand("property:also_known_as"));
+    outmapper.addPropertyMapping("ClassicalData::Work", "name",
+                                 outstore->expand("dc:title"));
+
+    outmapper.addTypeMapping("ClassicalData::Composition",
+                             outstore->expand("mo:Composition"));
+    outmapper.addPropertyMapping("ClassicalData::Composition", "composer",
+                                 outstore->expand("mo:composer"));
+    outmapper.addPropertyMapping("ClassicalData::Composition", "works",
+                                 outstore->expand("mo:produced_work"));
+
+    outstore->add(Triple("classical:Composer", "a",
+                         outstore->expand("owl:Class")));
+    outstore->add(Triple("classical:Composer", "rdfs:subClassOf",
+                         outstore->expand("mo:MusicArtist")));
+
+    QList<Importer *> importers = parentObject->findChildren<Importer *>();
+    std::cerr << "have " << importers.size() << " importers" << std::endl;
+
+    ComposerMap composers;
+
+    QList<Composer *> dated;
+    QList<Composer *> undated;
+
+    QList<Work *> works;
+    QList<Composition *> compositions;
+    QList<QObject *> other;
+    
+    foreach (Importer *importer, importers) {
+        QObjectList objects = importer->getImportedObjects();
+        foreach (QObject *o, objects) {
+            Composer *c;
+            if ((c = qobject_cast<Composer *>(o))) {
+                addMiscExpansions(c);
+                asciify(c);
+                if (c->birth() || c->death()) dated.push_back(c);
+                else undated.push_back(c);
+                continue;
+            }
+            Work *w;
+            if ((w = qobject_cast<Work *>(o))) {
+                asciify(w); 
+                works.push_back(w);
+                continue;
+            }
+            Composition *cn;
+            if ((cn = qobject_cast<Composition *>(o))) {
+                compositions.push_back(cn);
+                continue;
+            }
+        }
+    }
+
+    // get all the dated composers merged before attempting to match
+    // the undated ones
+    foreach (Composer *c, dated) {
+        mergeComposer(c, composers);
+    }
+    foreach (Composer *c, undated) {
+        mergeComposer(c, composers);
+    }
+
+    QObjectList toStore;
+
+    QSet<Composer *> cset;
+    for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
+        foreach (Composer *c, i.value()) {
+            if (!cset.contains(c)) {
+                assignUri(outstore, c);
+                toStore.push_back(c);
+                cset.insert(c);
+            }
+            foreach (Document *d, c->pages()) {
+                QString s = d->uri().toString();
+                addDbpediaResource(outstore, c, s);
+            }                        
+        }
+    }
+
+    QSet<QString> storedUris;
+
+    foreach (Work *w, works) {
+        Composition *cn = w->composition();
+        if (!cn) continue;
+        if (!cn->composer()) {
+            QString cname = cn->composerName();
+            if (cname != "") {
+                if (!composers.contains(cname.toLower())) {
+                    DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
+                } else {
+                    QSet<Composer *> cs = composers[cname.toLower()];
+                    if (cs.empty()) {
+                        DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
+                    } else if (cs.size() > 1) {
+                        DEBUG << "Failed to assign Composition to composer: "
+                              << cs.size() << " composers match name " << cname << endl;
+                    } else {
+                        cn->setComposer(*cs.begin());
+                    }
+                }
+            } else {
+                DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
+            }
+        }
+
+        if (cn->composer()) {
+            assignUri(outstore, w, cn->composer());
+        }
+
+        foreach (Document *d, w->pages()) {
+            QString s = d->uri().toString();
+            addDbpediaResource(outstore, w, s);
+            toStore.push_back(d);
+        }                        
+
+        QString u = w->property("uri").toUrl().toString();
+        if (u == "" || !storedUris.contains(u)) {
+            toStore.push_back(w);
+            if (u != "") storedUris.insert(u);
+        }
+    }
+
+    try {
+        outmapper.storeAllObjects(toStore);
+        
+    } catch (RDFException e) {
+        std::cerr << "Caught RDF exception: " << e.what() << std::endl;
+    }
+
+    DEBUG << "Stored, now saving" << endl;
+
+    outstore->save("test-out.ttl");
+
+    DEBUG << "Saved" << endl;
+
+
+    QMultiMap<QString, Composer *> cmap;
+    foreach (Composer *c, cset) {
+        QString n = c->getSortName(true);
+        cmap.insert(n, c);
+    }
+
+    std::cout << "Composers: " << cmap.size() << std::endl;
+
+    for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
+         i != cmap.end(); ++i) {
+
+        QString n = i.key();
+        Composer *c = i.value();
+        
+        std::cout << n.toStdString();
+        
+        QString d = c->getDisplayDates();
+        if (d != "") std::cout << " (" << d.toStdString() << ")";
+        std::cout << std::endl;
+    }
+
+    std::cout << std::endl;
+
+    std::cout << "Works by composer:" << std::endl;
+
+    for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
+         i != cmap.end(); ++i) {
+
+        QString n = i.key();
+        Composer *c = i.value();
+    
+        std::set<Work *, Work::Ordering> wmap;
+        foreach (Work *w, works) {
+            Composition *cn = w->composition();
+            if (!cn) continue;
+            if (cn->composer() != c) continue;
+            if (w->partOf()) continue;
+            wmap.insert(w);
+        }
+
+        if (wmap.empty()) continue;
+        
+        std::cout << n.toStdString() << std::endl;
+
+        foreach (Work *w, wmap) {
+            std::cout << " * ";
+            std::cout << w->name().toStdString();
+            if (w->catalogue() != "") {
+                std::cout << " [" << w->catalogue().toStdString() << "]";
+            }
+            if (w->opus() != "") {
+                std::cout << " [op. " << w->opus().toStdString() << "]";
+            }
+            std::cout << std::endl;
+            std::set<Work *, Work::Ordering> orderedParts;
+            foreach (Work *ww, w->parts()) {
+                orderedParts.insert(ww);
+            }
+            foreach (Work *ww, orderedParts) {
+                std::cout << "    ";
+                if (ww->number() != "") {
+                    std::cout << ww->number().toStdString() << ". ";
+                }
+                std::cout << ww->name().toStdString();
+                if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
+                    std::cout << " [" << ww->catalogue().toStdString() << "]";
+                }
+                if (ww->opus() != "" && ww->opus() != w->opus()) {
+                    std::cout << " [op. " << ww->opus().toStdString() << "]";
+                }
+                std::cout << std::endl;
+            }
+        }
+
+        std::cout << std::endl;
+    }
+
+    delete outstore;
+
+    DEBUG << "Done" << endl;
+
+
+}
+
+