Mercurial > hg > classical
diff import/Test.cpp @ 0:e8f4c2b55fd8 classical-rdf
* reorganise
author | Chris Cannam |
---|---|
date | Tue, 01 Dec 2009 17:50:41 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/Test.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,831 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "Objects.h" + +#include <dataquay/BasicStore.h> +#include <dataquay/RDFException.h> +#include <dataquay/objectmapper/ObjectMapper.h> +#include <dataquay/objectmapper/ObjectBuilder.h> +#include <dataquay/objectmapper/ContainerBuilder.h> + +#include "ImportClassicalComposersOrg.h" +#include "ImportClassicalDotNet.h" +#include "ImportWikipediaComposers.h" +#include "ImportWikipediaWorks.h" +#include "ImportWikipediaWorksK.h" +#include "ImportWikipediaWorksList.h" +#include "ImportHoboken.h" + +#include <dataquay/Debug.h> + +using namespace ClassicalData; +using namespace Dataquay; + +#include <iostream> +#include <set> + +typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers + +bool datesMatch(Composer *a, Composer *b) +{ + if (a->birth() && b->birth()) { + if (abs(a->birth()->year() - b->birth()->year()) > 1) { + if ((!a->birth()->approximate() && !b->birth()->approximate()) || + (abs(a->birth()->year() - b->birth()->year()) > 10)) { + return false; + } + } + } + if (a->death() && b->death()) { + if (abs(a->death()->year() - b->death()->year()) > 1) { + if ((!a->death()->approximate() && !b->death()->approximate()) || + (abs(a->death()->year() - b->death()->year()) > 10)) { + return false; + } + } + } + return true; +} + +void +addMiscExpansions(Composer *c) +{ + QString n = c->name(); + + DEBUG << "addMiscExpansions: n = " << n << endl; + + // lovely hard-coded special cases go here! some of these are + // needed for works->composer assignments + if (n == "Balakirev, Milii") { + c->addAlias("Mily Balakirev"); + } + if (n.startsWith("Cui, C")) { + c->addAlias(QString::fromUtf8("C\303\251sar Cui")); + } + if (n == "Handel, George Frideric") { + c->addAlias("Handel, Georg Friedrich"); + c->addAlias("Handel"); + } + if (n == "Mayr, Simon") { + c->addAlias("Mayr"); + } + + n.replace(", Sr.", " Sr."); + n.replace(", Jr.", " Jr."); + + int comma = n.indexOf(", "); + if (comma > 0 && comma + 2 < n.length()) { + + QString left = n.left(comma); + QString right = n.right(n.length() - comma - 2); + + QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$"); + if (jrsr.indexIn(right) >= 0) { + left = left + jrsr.cap(1); + right = right.left(right.length()-jrsr.matchedLength()); + } + n = right + " " + left; + } + + if (n != c->name()) c->addAlias(n); + + if (n.contains("Sergey")) { + QString nn(n); + nn.replace("Sergey", "Sergei"); + c->addAlias(nn); + } + + QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive); + if (sr.indexIn(n) >= 0) { + QString nr = n; + nr.replace(sr.pos(0), sr.matchedLength(), " I"); + nr.replace(" ", " "); + DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; + c->addAlias(nr); + } + QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive); + if (jr.indexIn(n) >= 0) { + QString nr = n; + nr.replace(jr.pos(0), jr.matchedLength(), " II"); + nr.replace(" ", " "); + DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; + c->addAlias(nr); + } + QString nr = n; + nr.replace("(I)", "I"); + nr.replace("(II)", "II"); + nr.replace("(III)", "III"); + c->addAlias(nr); +} + +bool namesFuzzyMatch(QString an, Composer *b) +{ + // ew! + + QString bn = b->name(); + if (bn == an) return true; + if (b->aliases().contains(an)) return true; + int aSurnameIndex = 0, bSurnameIndex = 0; + if (an.contains(",")) { + an.replace(",", ""); + } else { + aSurnameIndex = -1; + } + if (bn.contains(",")) { + bn.replace(",", ""); + } else { + bSurnameIndex = -1; + } + QStringList nl = an.split(QRegExp("[ -]")); + QStringList bnl = bn.split(QRegExp("[ -]")); + int matchCount = 0; + QString surnameMatch = ""; + if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; + if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; + if (nl[aSurnameIndex][0].isUpper() && + nl[aSurnameIndex] != "Della" && + nl[aSurnameIndex] == bnl[bSurnameIndex]) { + surnameMatch = nl[aSurnameIndex]; + } + foreach (QString elt, nl) { + if (!elt[0].isUpper() || elt == "Della") continue; + if (bnl.contains(elt)) { + ++matchCount; + continue; + } + } + if (matchCount > 1 && surnameMatch != "") { + DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; + return true; + } + return false; +} + +bool +hasBetterName(Composer *c, Composer *other) +{ + if (c->name() == other->name()) return false; + + // Try to guess which of c and other is more likely to have a good + // "canonical form" of the composer's name + + if (c->name().startsWith("van ")) { + return false; // wrong choice of sort for e.g. LvB; should be + // Beethoven, Ludwig van, not van Beethoven, Ludwig + } + if (other->name().startsWith("van ")) { + return true; + } + + if (c->aliases().size() != other->aliases().size()) { + // a rather weak heuristic + return c->aliases().size() > other->aliases().size(); + } + + if (c->name().contains(',') && !other->name().contains(',')) { + // another rather weak heuristic + return true; + } + + return false; +} + +void mergeComposer(Composer *c, ComposerMap &composers) +{ + QString name = c->name(); + + QSet<QString> allNames = c->aliases(); + allNames.insert(name); + + QString dates; + if (c->birth()) { + if (c->death()) { + dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year()); + } else { + dates = QString("%1-").arg(c->birth()->year()); + } + } + if (dates != "") { + allNames.insert(dates); + } + + QSet<Composer *> matches; + + foreach (QString candidateName, allNames) { + QString key = candidateName.toLower(); + if (composers.contains(key)) { + foreach (Composer *candidate, composers[key]) { + if (candidateName == dates) { + if (!namesFuzzyMatch(c->name(), candidate) && + !namesFuzzyMatch(candidate->name(), c)) { + DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; + continue; + } else { + DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; + } + } else { + if (!datesMatch(c, candidate)) { + DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; + continue; + } + } + matches.insert(candidate); + } + } + } + + if (matches.empty()) { + DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; + + if (!c->birth() && !c->death()) { + // laboriously look for fuzzy match across _all_ composers + for (ComposerMap::iterator i = composers.begin(); + i != composers.end(); ++i) { + foreach (Composer *candidate, *i) { + if (namesFuzzyMatch(c->name(), candidate)) { + DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; + matches.insert(candidate); + break; + } + } + if (!matches.empty()) break; + } + } + + if (matches.empty()) { + foreach (QString candidateName, allNames) { + composers[candidateName.toLower()].insert(c); + DEBUG << "added for alias or date " << candidateName << endl; + } + return; + } + } + + if (matches.size() > 1) { + DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl; + } + + Composer *other = *matches.begin(); + + DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl; + + if (hasBetterName(c, other)) { + other->addAlias(other->name()); + other->setName(c->name()); + } else { + other->addAlias(c->name()); + } + composers[c->name().toLower()].insert(other); + DEBUG << "linking from alias " << c->name() << endl; + + foreach (QString alias, c->aliases()) { + if (alias != other->name() && + !other->aliases().contains(alias)) { + other->addAlias(alias); + composers[alias.toLower()].insert(other); + DEBUG << "linking from alias " << alias << endl; + } + } + + foreach (Document *d, c->pages()) { + bool found = false; + foreach (Document *dd, other->pages()) { + if (d->uri() == dd->uri()) { + found = true; + break; + } + } + if (!found) { + d->setTopic(other); + other->addPage(d); + } + } + + //!!! actually the "approximate" bits of the following are bogus; + // a source reporting birth or death date as approx is probably + // more accurate than one reporting an exact date + + if (c->birth()) { + if (!other->birth() || other->birth()->approximate()) { + other->setBirth(c->birth()); + } + } + + if (c->death()) { + if (!other->death() || other->death()->approximate()) { + other->setDeath(c->death()); + } + } + + if (c->gender() != "") other->setGender(c->gender()); + if (c->nationality() != "") other->setNationality(c->nationality()); + if (c->remarks() != "") other->setRemarks(c->remarks()); + if (c->period() != "") other->setPeriod(c->period()); + +} + +QString +asciify(QString field) +{ + // accented characters etc -- add "ascii version" for dumb search purposes + QString ascii; + for (int i = 0; i < field.length(); ++i) { + QString dc = field[i].decomposition(); + if (dc != "") ascii += dc[0]; + else if (field[i] == QChar(0x00DF)) { + ascii += "ss"; + } else { + ascii += field[i]; + } + } + ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe + ascii.replace(QString::fromUtf8("\342\200\222"), "-"); + ascii.replace(QString::fromUtf8("\342\200\223"), "-"); + ascii.replace(QString::fromUtf8("\342\200\224"), "-"); + ascii.replace(QString::fromUtf8("\342\200\225"), "-"); + return ascii; +} + +void +asciify(Composer *c) +{ + QString n = c->name(); + QString asc = asciify(n); + if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc); + foreach (QString alias, c->aliases()) { + asc = asciify(alias); + if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc); + } +} + +void +asciify(Work *w) +{ + QString n = w->name(); + QString asc = asciify(n); + if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc); + foreach (QString alias, w->aliases()) { + asc = asciify(alias); + if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc); + } +} + +void +assignUri(Store *s, Composer *c) +{ + static QSet<QString> convSet; + QString conv = c->name(); + if (!conv.contains(",")) { + QStringList sl = conv.split(" "); + if (!sl.empty()) { + sl.push_front(sl[sl.size()-1]); + sl.removeLast(); + conv = sl.join(" "); + DEBUG << "assignUri: " << c->name() << " -> " << conv << endl; + } + } + conv = asciify(conv); + conv.replace(" ", "_"); + conv.replace("-", "_"); + conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); + conv = conv.toLower(); + QString initial = conv; + int i = 1; + while (convSet.contains(conv)) { + conv = QString("%1__%2").arg(initial).arg(i); + i++; + } + convSet.insert(conv); + c->setProperty("uri", s->expand(":composer_" + conv)); +} + +void +assignUri(Store *s, Work *w, Composer *c) +{ + QString pfx = c->property("uri").toUrl().toString(); + DEBUG << "pfx = " << pfx << endl; + if (!pfx.contains("composer_")) pfx = ""; + else pfx.replace(QRegExp("^.*composer_"), ""); + + static QSet<QString> convSet; + QString conv = w->catalogue(); + if (conv == "") conv = w->opus(); + conv = conv.replace(".", ""); + bool hasOpus = (conv != ""); + if (conv == "") conv = w->name(); + if (w->number() != "") conv = conv + "_no" + w->number(); + if (pfx != "") conv = pfx + "_" + conv; + conv = asciify(conv); + conv.replace(" ", "_"); + conv.replace("-", "_"); + conv.replace(":", "_"); + conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); + conv = conv.toLower(); + // I think actually for works we want to merge duplicates rather than + // assign them separate URIs, _unless_ they lack a viable opus number + if (!hasOpus) { + QString initial = conv; + int i = 1; + while (convSet.contains(conv)) { + conv = QString("%1__%2").arg(initial).arg(i); + i++; + } + } + convSet.insert(conv); + w->setProperty("uri", s->expand(":work_" + conv)); +} + +void +addDbpediaResource(Store *store, QObject *o, QString s) +{ + QUrl u = o->property("uri").toUrl(); + if (u == QUrl()) return; + if (s.startsWith("http://en.wikipedia.org/wiki/")) { + store->add(Triple(u, + "mo:wikipedia", + QUrl(s))); + s.replace("http://en.wikipedia.org/wiki/", + "http://dbpedia.org/resource/"); + store->add(Triple(u, + "owl:sameAs", + QUrl(s))); + } +} + +int main(int argc, char **argv) +{ + qRegisterMetaType<HistoricalEvent *> + ("ClassicalData::HistoricalEvent*"); + qRegisterMetaType<Birth *> + ("ClassicalData::Birth*"); + qRegisterMetaType<Death *> + ("ClassicalData::Death*"); + qRegisterMetaType<Composition *> + ("ClassicalData::Composition*"); + qRegisterMetaType<Work *> + ("ClassicalData::Work*"); + qRegisterMetaType<Movement *> + ("ClassicalData::Movement*"); + qRegisterMetaType<Composer *> + ("ClassicalData::Composer*"); + qRegisterMetaType<Document *> + ("ClassicalData::Document*"); + qRegisterMetaType<Form *> + ("ClassicalData::Form*"); + qRegisterMetaType<QSet<Work *> > + ("QSet<ClassicalData::Work*>"); + qRegisterMetaType<QSet<Movement *> > + ("QSet<ClassicalData::Movement*>"); + qRegisterMetaType<QSet<Document *> > + ("QSet<ClassicalData::Document*>"); + qRegisterMetaType<QSet<Form *> > + ("QSet<ClassicalData::Form*>"); + qRegisterMetaType<QSet<QString> > + ("QSet<QString>"); + + qRegisterMetaType<ClassicalComposersOrgImporter *> + ("ClassicalData::ClassicalComposersOrgImporter*"); + qRegisterMetaType<ClassicalDotNetImporter *> + ("ClassicalData::ClassicalDotNetImporter*"); + qRegisterMetaType<WikipediaComposersImporter *> + ("ClassicalData::WikipediaComposersImporter*"); + qRegisterMetaType<WikipediaWorksImporter *> + ("ClassicalData::WikipediaWorksImporter*"); + qRegisterMetaType<WikipediaWorksKImporter *> + ("ClassicalData::WikipediaWorksKImporter*"); + qRegisterMetaType<WikipediaWorksListImporter *> + ("ClassicalData::WikipediaWorksListImporter*"); + qRegisterMetaType<HobokenImporter *> + ("ClassicalData::HobokenImporter*"); + + ObjectBuilder::getInstance()->registerClass + <HistoricalEvent>("ClassicalData::HistoricalEvent*"); + ObjectBuilder::getInstance()->registerClass + <Birth>("ClassicalData::Birth*"); + ObjectBuilder::getInstance()->registerClass + <Death>("ClassicalData::Death*"); + ObjectBuilder::getInstance()->registerClass + <Composition>("ClassicalData::Composition*"); + ObjectBuilder::getInstance()->registerClass + <Work, QObject>("ClassicalData::Work*"); + ObjectBuilder::getInstance()->registerClass + <Movement, QObject>("ClassicalData::Movement*"); + ObjectBuilder::getInstance()->registerClass + <Composer, QObject>("ClassicalData::Composer*"); + ObjectBuilder::getInstance()->registerClass + <Document, QObject>("ClassicalData::Document*"); + ObjectBuilder::getInstance()->registerClass + <Form, QObject>("ClassicalData::Form*"); + + ObjectBuilder::getInstance()->registerClass + <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*"); + ObjectBuilder::getInstance()->registerClass + <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*"); + ObjectBuilder::getInstance()->registerClass + <HobokenImporter>("ClassicalData::HobokenImporter*"); + + ContainerBuilder::getInstance()->registerContainer + <QString, QSet<QString> > + ("QString", "QSet<QString>", ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Work*, QSet<Work*> > + ("ClassicalData::Work*", "QSet<ClassicalData::Work*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Movement*, QSet<Movement*> > + ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Document*, QSet<Document*> > + ("ClassicalData::Document*", "QSet<ClassicalData::Document*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Form*, QSet<Form*> > + ("ClassicalData::Form*", "QSet<ClassicalData::Form*>", + ContainerBuilder::SetKind); + + BasicStore *store = BasicStore::load("file:importers.ttl"); + ObjectMapper mapper(store); + QObject *parentObject = mapper.loadAllObjects(new QObject()); + + BasicStore *outstore = new BasicStore(); + ObjectMapper outmapper(outstore); + + outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged); + + outstore->addPrefix("type", outmapper.getObjectTypePrefix()); + outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/"); + outstore->addPrefix("property", outmapper.getPropertyPrefix()); + outstore->addPrefix("rel", outmapper.getRelationshipPrefix()); + outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/"); + outstore->addPrefix("mo", "http://purl.org/ontology/mo/"); + outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/"); + outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/"); + outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#"); + outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + + outmapper.addPropertyMapping("ClassicalData::Composer", "pages", + outstore->expand("foaf:page")); + outmapper.addPropertyMapping("ClassicalData::Composer", "name", + outstore->expand("foaf:name")); + outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", + outstore->expand("property:also_known_as")); + outmapper.addPropertyMapping("ClassicalData::Document", "topic", + outstore->expand("foaf:primaryTopic")); + + outmapper.addTypeMapping("ClassicalData::Work", + outstore->expand("mo:MusicalWork")); + outmapper.addPropertyMapping("ClassicalData::Work", "composition", + outstore->expand("mo:composed_in")); + outmapper.addPropertyMapping("ClassicalData::Work", "opus", + outstore->expand("mo:opus")); + outmapper.addPropertyMapping("ClassicalData::Work", "k6", + outstore->expand("mo:k6")); + outmapper.addPropertyMapping("ClassicalData::Work", "bwv", + outstore->expand("mo:bwv")); + outmapper.addPropertyMapping("ClassicalData::Work", "number", + outstore->expand("mo:number")); + outmapper.addPropertyMapping("ClassicalData::Work", "partOf", + outstore->expand("dc:isPartOf")); + outmapper.addPropertyMapping("ClassicalData::Work", "parts", + outstore->expand("dc:hasPart")); + outmapper.addPropertyMapping("ClassicalData::Work", "pages", + outstore->expand("foaf:page")); + outmapper.addPropertyMapping("ClassicalData::Work", "forms", + outstore->expand("property:form")); + outmapper.addPropertyMapping("ClassicalData::Work", "key", + outstore->expand("mo:key")); + outmapper.addPropertyMapping("ClassicalData::Work", "aliases", + outstore->expand("property:also_known_as")); + outmapper.addPropertyMapping("ClassicalData::Work", "name", + outstore->expand("dc:title")); + + outmapper.addTypeMapping("ClassicalData::Composition", + outstore->expand("mo:Composition")); + outmapper.addPropertyMapping("ClassicalData::Composition", "composer", + outstore->expand("mo:composer")); + outmapper.addPropertyMapping("ClassicalData::Composition", "works", + outstore->expand("mo:produced_work")); + + outstore->add(Triple("classical:Composer", "a", + outstore->expand("owl:Class"))); + outstore->add(Triple("classical:Composer", "rdfs:subClassOf", + outstore->expand("mo:MusicArtist"))); + + QList<Importer *> importers = parentObject->findChildren<Importer *>(); + std::cerr << "have " << importers.size() << " importers" << std::endl; + + ComposerMap composers; + + QList<Composer *> dated; + QList<Composer *> undated; + + QList<Work *> works; + QList<Composition *> compositions; + QList<QObject *> other; + + foreach (Importer *importer, importers) { + QObjectList objects = importer->getImportedObjects(); + foreach (QObject *o, objects) { + Composer *c; + if ((c = qobject_cast<Composer *>(o))) { + addMiscExpansions(c); + asciify(c); + if (c->birth() || c->death()) dated.push_back(c); + else undated.push_back(c); + continue; + } + Work *w; + if ((w = qobject_cast<Work *>(o))) { + asciify(w); + works.push_back(w); + continue; + } + Composition *cn; + if ((cn = qobject_cast<Composition *>(o))) { + compositions.push_back(cn); + continue; + } + } + } + + // get all the dated composers merged before attempting to match + // the undated ones + foreach (Composer *c, dated) { + mergeComposer(c, composers); + } + foreach (Composer *c, undated) { + mergeComposer(c, composers); + } + + QObjectList toStore; + + QSet<Composer *> cset; + for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { + foreach (Composer *c, i.value()) { + if (!cset.contains(c)) { + assignUri(outstore, c); + toStore.push_back(c); + cset.insert(c); + } + foreach (Document *d, c->pages()) { + QString s = d->uri().toString(); + addDbpediaResource(outstore, c, s); + } + } + } + + QSet<QString> storedUris; + + foreach (Work *w, works) { + Composition *cn = w->composition(); + if (!cn) continue; + if (!cn->composer()) { + QString cname = cn->composerName(); + if (cname != "") { + if (!composers.contains(cname.toLower())) { + DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; + } else { + QSet<Composer *> cs = composers[cname.toLower()]; + if (cs.empty()) { + DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; + } else if (cs.size() > 1) { + DEBUG << "Failed to assign Composition to composer: " + << cs.size() << " composers match name " << cname << endl; + } else { + cn->setComposer(*cs.begin()); + } + } + } else { + DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl; + } + } + + if (cn->composer()) { + assignUri(outstore, w, cn->composer()); + } + + foreach (Document *d, w->pages()) { + QString s = d->uri().toString(); + addDbpediaResource(outstore, w, s); + toStore.push_back(d); + } + + QString u = w->property("uri").toUrl().toString(); + if (u == "" || !storedUris.contains(u)) { + toStore.push_back(w); + if (u != "") storedUris.insert(u); + } + } + + try { + outmapper.storeAllObjects(toStore); + + } catch (RDFException e) { + std::cerr << "Caught RDF exception: " << e.what() << std::endl; + } + + DEBUG << "Stored, now saving" << endl; + + outstore->save("test-out.ttl"); + + DEBUG << "Saved" << endl; + + + QMultiMap<QString, Composer *> cmap; + foreach (Composer *c, cset) { + QString n = c->getSortName(true); + cmap.insert(n, c); + } + + std::cout << "Composers: " << cmap.size() << std::endl; + + for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); + i != cmap.end(); ++i) { + + QString n = i.key(); + Composer *c = i.value(); + + std::cout << n.toStdString(); + + QString d = c->getDisplayDates(); + if (d != "") std::cout << " (" << d.toStdString() << ")"; + std::cout << std::endl; + } + + std::cout << std::endl; + + std::cout << "Works by composer:" << std::endl; + + for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); + i != cmap.end(); ++i) { + + QString n = i.key(); + Composer *c = i.value(); + + std::set<Work *, Work::Ordering> wmap; + foreach (Work *w, works) { + Composition *cn = w->composition(); + if (!cn) continue; + if (cn->composer() != c) continue; + if (w->partOf()) continue; + wmap.insert(w); + } + + if (wmap.empty()) continue; + + std::cout << n.toStdString() << std::endl; + + foreach (Work *w, wmap) { + std::cout << " * "; + std::cout << w->name().toStdString(); + if (w->catalogue() != "") { + std::cout << " [" << w->catalogue().toStdString() << "]"; + } + if (w->opus() != "") { + std::cout << " [op. " << w->opus().toStdString() << "]"; + } + std::cout << std::endl; + std::set<Work *, Work::Ordering> orderedParts; + foreach (Work *ww, w->parts()) { + orderedParts.insert(ww); + } + foreach (Work *ww, orderedParts) { + std::cout << " "; + if (ww->number() != "") { + std::cout << ww->number().toStdString() << ". "; + } + std::cout << ww->name().toStdString(); + if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) { + std::cout << " [" << ww->catalogue().toStdString() << "]"; + } + if (ww->opus() != "" && ww->opus() != w->opus()) { + std::cout << " [op. " << ww->opus().toStdString() << "]"; + } + std::cout << std::endl; + } + } + + std::cout << std::endl; + } + + delete outstore; + + DEBUG << "Done" << endl; + + +} + +