Chris@0: /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ Chris@0: Chris@0: #include "Objects.h" Chris@0: Chris@0: #include Chris@0: #include Chris@0: #include Chris@0: #include Chris@0: #include Chris@0: Chris@0: #include "ImportClassicalComposersOrg.h" Chris@0: #include "ImportClassicalDotNet.h" Chris@4: #include "ImportClassicalArchives.h" Chris@0: #include "ImportWikipediaComposers.h" Chris@0: #include "ImportWikipediaWorks.h" Chris@0: #include "ImportWikipediaWorksK.h" Chris@0: #include "ImportWikipediaWorksList.h" Chris@0: #include "ImportHoboken.h" Chris@0: Chris@0: #include Chris@0: Chris@0: using namespace ClassicalData; Chris@0: using namespace Dataquay; Chris@0: Chris@0: #include Chris@0: #include Chris@0: Chris@0: typedef QMap > ComposerMap; // name -> composers Chris@0: Chris@0: void Chris@0: addMiscExpansions(Composer *c) Chris@0: { Chris@0: QString n = c->name(); Chris@0: Chris@0: DEBUG << "addMiscExpansions: n = " << n << endl; Chris@0: Chris@0: // lovely hard-coded special cases go here! some of these are Chris@0: // needed for works->composer assignments Chris@0: if (n == "Balakirev, Milii") { Chris@0: c->addAlias("Mily Balakirev"); Chris@0: } Chris@0: if (n.startsWith("Cui, C")) { Chris@0: c->addAlias(QString::fromUtf8("C\303\251sar Cui")); Chris@0: } Chris@0: if (n == "Handel, George Frideric") { Chris@0: c->addAlias("Handel, Georg Friedrich"); Chris@0: c->addAlias("Handel"); Chris@0: } Chris@1: if (n == "Prokofiev, Sergey") { Chris@1: c->addAlias("Prokofieff, Sergei"); Chris@1: c->addAlias("Sergei Prokofieff"); Chris@1: } Chris@1: if (n == "Rossini, Gioacchino") { Chris@1: c->addAlias("Rossini, Gioachino"); Chris@1: c->addAlias("Gioachino Rossini"); Chris@1: } Chris@1: if (n == "Edwards, Richard") { Chris@1: c->addAlias("Edwardes, Richard"); Chris@1: c->addAlias("Richard Edwardes"); Chris@1: c->addAlias("Richard Edwards"); Chris@1: } Chris@1: if (n == "Rimsky-Korsakov, Nikolay Andreyevich") { Chris@1: c->addAlias("Nikolai Rimsky-Korsakov"); Chris@1: } Chris@1: if (n.startsWith("Piccinni, Nico")) { Chris@1: c->addAlias(n); Chris@1: c->setName(QString::fromUtf8("Piccinni, Niccol\303\262")); Chris@1: } Chris@1: if (n == "Tchaikovsky, Pyotr Ilyich") { Chris@1: c->addAlias("Tchaikovsky, Piotr Ilyitch"); Chris@1: } Chris@1: if (n == "Wilhelm Stenhammar") { Chris@1: c->addAlias("Stenhammar, Vilhelm Eugene"); Chris@1: c->setName("Stenhammar, Wilhelm"); Chris@1: c->addAlias(n); Chris@1: } Chris@1: if (n == "Mercadante, Saverio Rafaele") { Chris@1: c->addAlias("Mercadante, Giuseppe"); Chris@1: } Chris@1: if (n == "Johann Wenzel Anton Stamitz") { Chris@1: c->addAlias(n); Chris@1: c->setName("Stamitz, Johann Wenzel Anton"); Chris@1: c->addAlias("Stamitz, Jan Vaclav"); Chris@1: } Chris@1: if (n == "Mario Castelnuovo-Tedesco") { Chris@1: c->addAlias("Castelnuovo Tedesco, Mario"); Chris@1: } Chris@0: if (n == "Mayr, Simon") { Chris@0: c->addAlias("Mayr"); Chris@0: } Chris@0: Chris@0: n.replace(", Sr.", " Sr."); Chris@0: n.replace(", Jr.", " Jr."); Chris@0: Chris@0: int comma = n.indexOf(", "); Chris@0: if (comma > 0 && comma + 2 < n.length()) { Chris@0: Chris@0: QString left = n.left(comma); Chris@0: QString right = n.right(n.length() - comma - 2); Chris@0: Chris@0: QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$"); Chris@0: if (jrsr.indexIn(right) >= 0) { Chris@0: left = left + jrsr.cap(1); Chris@0: right = right.left(right.length()-jrsr.matchedLength()); Chris@0: } Chris@0: n = right + " " + left; Chris@0: } Chris@0: Chris@0: if (n != c->name()) c->addAlias(n); Chris@0: Chris@0: if (n.contains("Sergey")) { Chris@0: QString nn(n); Chris@0: nn.replace("Sergey", "Sergei"); Chris@0: c->addAlias(nn); Chris@1: } else if (n.contains("Sergei")) { Chris@1: QString nn(n); Chris@1: nn.replace("Sergei", "Sergey"); Chris@1: c->addAlias(nn); Chris@0: } Chris@0: Chris@0: QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive); Chris@0: if (sr.indexIn(n) >= 0) { Chris@0: QString nr = n; Chris@0: nr.replace(sr.pos(0), sr.matchedLength(), " I"); Chris@0: nr.replace(" ", " "); Chris@0: DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; Chris@0: c->addAlias(nr); Chris@0: } Chris@0: QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive); Chris@0: if (jr.indexIn(n) >= 0) { Chris@0: QString nr = n; Chris@0: nr.replace(jr.pos(0), jr.matchedLength(), " II"); Chris@0: nr.replace(" ", " "); Chris@0: DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; Chris@0: c->addAlias(nr); Chris@0: } Chris@0: QString nr = n; Chris@0: nr.replace("(I)", "I"); Chris@0: nr.replace("(II)", "II"); Chris@0: nr.replace("(III)", "III"); Chris@0: c->addAlias(nr); Chris@0: } Chris@0: Chris@5: QString makeNameKey(QString name) Chris@5: { Chris@5: QString key = name.toLower() Chris@5: .replace("'", "") Chris@5: .replace("x", "ks") Chris@5: .replace("y", "i") Chris@5: .replace("k", "c") Chris@5: .replace("ch", "c") Chris@5: .replace("cc", "c") Chris@5: .replace("v", "f") Chris@5: .replace("ff", "f") Chris@5: .replace("th", "t") Chris@5: .replace("tch", "ch") Chris@5: .replace("er", "r"); Chris@5: // DEBUG << "makeNameKey(" << name << "): " << key << endl; Chris@5: return key; Chris@5: } Chris@5: Chris@0: bool namesFuzzyMatch(QString an, Composer *b) Chris@0: { Chris@0: // ew! Chris@0: Chris@0: QString bn = b->name(); Chris@0: if (bn == an) return true; Chris@0: if (b->aliases().contains(an)) return true; Chris@0: int aSurnameIndex = 0, bSurnameIndex = 0; Chris@0: if (an.contains(",")) { Chris@0: an.replace(",", ""); Chris@0: } else { Chris@0: aSurnameIndex = -1; Chris@0: } Chris@0: if (bn.contains(",")) { Chris@0: bn.replace(",", ""); Chris@0: } else { Chris@0: bSurnameIndex = -1; Chris@0: } Chris@0: QStringList nl = an.split(QRegExp("[ -]")); Chris@5: QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); Chris@0: int matchCount = 0; Chris@0: QString surnameMatch = ""; Chris@0: if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; Chris@0: if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; Chris@0: if (nl[aSurnameIndex][0].isUpper() && Chris@0: nl[aSurnameIndex] != "Della" && Chris@5: makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { Chris@0: surnameMatch = nl[aSurnameIndex]; Chris@0: } Chris@5: int tested = 0; Chris@0: foreach (QString elt, nl) { Chris@0: if (!elt[0].isUpper() || elt == "Della") continue; Chris@5: QString k = makeNameKey(elt); Chris@5: if (bnl.contains(k)) { Chris@0: ++matchCount; Chris@5: } Chris@5: if (++tested == 2 && matchCount == 0) { Chris@5: return false; Chris@0: } Chris@0: } Chris@5: if (surnameMatch != "") { Chris@0: DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; Chris@5: if (matchCount > 1) { Chris@5: return true; Chris@5: } else { Chris@5: DEBUG << "(but not enough else matched)" << endl; Chris@5: return false; Chris@5: } Chris@0: } Chris@0: return false; Chris@0: } Chris@0: Chris@0: bool Chris@0: hasBetterName(Composer *c, Composer *other) Chris@0: { Chris@0: if (c->name() == other->name()) return false; Chris@0: Chris@0: // Try to guess which of c and other is more likely to have a good Chris@0: // "canonical form" of the composer's name Chris@0: Chris@0: if (c->name().startsWith("van ")) { Chris@0: return false; // wrong choice of sort for e.g. LvB; should be Chris@0: // Beethoven, Ludwig van, not van Beethoven, Ludwig Chris@0: } Chris@0: if (other->name().startsWith("van ")) { Chris@0: return true; Chris@0: } Chris@0: Chris@0: if (c->aliases().size() != other->aliases().size()) { Chris@0: // a rather weak heuristic Chris@0: return c->aliases().size() > other->aliases().size(); Chris@0: } Chris@0: Chris@0: if (c->name().contains(',') && !other->name().contains(',')) { Chris@0: // another rather weak heuristic Chris@0: return true; Chris@0: } Chris@0: Chris@0: return false; Chris@0: } Chris@0: Chris@0: void mergeComposer(Composer *c, ComposerMap &composers) Chris@0: { Chris@0: QString name = c->name(); Chris@0: Chris@0: QSet allNames = c->aliases(); Chris@0: allNames.insert(name); Chris@0: Chris@0: QString dates; Chris@0: if (c->birth()) { Chris@0: if (c->death()) { Chris@0: dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year()); Chris@0: } else { Chris@0: dates = QString("%1-").arg(c->birth()->year()); Chris@0: } Chris@0: } Chris@0: if (dates != "") { Chris@0: allNames.insert(dates); Chris@0: } Chris@0: Chris@0: QSet matches; Chris@0: Chris@0: foreach (QString candidateName, allNames) { Chris@5: QString key = makeNameKey(candidateName); Chris@0: if (composers.contains(key)) { Chris@0: foreach (Composer *candidate, composers[key]) { Chris@0: if (candidateName == dates) { Chris@5: if (c->name() == candidate->name()) { Chris@5: DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; Chris@5: } else if (!namesFuzzyMatch(c->name(), candidate) && Chris@5: !namesFuzzyMatch(candidate->name(), c)) { Chris@0: DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; Chris@0: continue; Chris@0: } else { Chris@0: DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; Chris@0: } Chris@0: } else { Chris@1: if (!c->datesMatch(candidate)) { Chris@0: DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; Chris@0: continue; Chris@0: } Chris@0: } Chris@0: matches.insert(candidate); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: if (matches.empty()) { Chris@0: DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; Chris@0: Chris@0: if (!c->birth() && !c->death()) { Chris@5: DEBUG << "Composer has no dates, laboriously searching for all names" << endl; Chris@0: // laboriously look for fuzzy match across _all_ composers Chris@0: for (ComposerMap::iterator i = composers.begin(); Chris@0: i != composers.end(); ++i) { Chris@0: foreach (Composer *candidate, *i) { Chris@0: if (namesFuzzyMatch(c->name(), candidate)) { Chris@0: DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; Chris@0: matches.insert(candidate); Chris@0: break; Chris@0: } Chris@0: } Chris@0: if (!matches.empty()) break; Chris@0: } Chris@0: } Chris@0: Chris@0: if (matches.empty()) { Chris@0: foreach (QString candidateName, allNames) { Chris@5: QString key = makeNameKey(candidateName); Chris@5: composers[key].insert(c); Chris@0: DEBUG << "added for alias or date " << candidateName << endl; Chris@0: } Chris@0: return; Chris@0: } Chris@0: } Chris@0: Chris@0: if (matches.size() > 1) { Chris@0: DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl; Chris@0: } Chris@0: Chris@0: Composer *other = *matches.begin(); Chris@0: Chris@0: DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl; Chris@0: Chris@0: if (hasBetterName(c, other)) { Chris@0: other->addAlias(other->name()); Chris@0: other->setName(c->name()); Chris@0: } else { Chris@0: other->addAlias(c->name()); Chris@0: } Chris@5: composers[makeNameKey(c->name())].insert(other); Chris@0: DEBUG << "linking from alias " << c->name() << endl; Chris@0: Chris@0: foreach (QString alias, c->aliases()) { Chris@0: if (alias != other->name() && Chris@0: !other->aliases().contains(alias)) { Chris@0: other->addAlias(alias); Chris@5: composers[makeNameKey(alias)].insert(other); Chris@0: DEBUG << "linking from alias " << alias << endl; Chris@0: } Chris@0: } Chris@0: Chris@0: foreach (Document *d, c->pages()) { Chris@0: bool found = false; Chris@0: foreach (Document *dd, other->pages()) { Chris@0: if (d->uri() == dd->uri()) { Chris@0: found = true; Chris@0: break; Chris@0: } Chris@0: } Chris@0: if (!found) { Chris@0: d->setTopic(other); Chris@0: other->addPage(d); Chris@0: } Chris@0: } Chris@0: Chris@0: //!!! actually the "approximate" bits of the following are bogus; Chris@0: // a source reporting birth or death date as approx is probably Chris@0: // more accurate than one reporting an exact date Chris@0: Chris@0: if (c->birth()) { Chris@0: if (!other->birth() || other->birth()->approximate()) { Chris@0: other->setBirth(c->birth()); Chris@0: } Chris@0: } Chris@0: Chris@0: if (c->death()) { Chris@0: if (!other->death() || other->death()->approximate()) { Chris@0: other->setDeath(c->death()); Chris@0: } Chris@0: } Chris@0: Chris@0: if (c->gender() != "") other->setGender(c->gender()); Chris@4: Chris@4: foreach (QString s, c->nationality()) { Chris@4: other->addNationality(s); Chris@4: } Chris@4: Chris@4: foreach (QUrl s, c->geonameURIs()) { Chris@4: other->addGeonameURI(s); Chris@4: } Chris@4: Chris@0: if (c->remarks() != "") other->setRemarks(c->remarks()); Chris@0: if (c->period() != "") other->setPeriod(c->period()); Chris@0: Chris@0: } Chris@0: Chris@0: QString Chris@0: asciify(QString field) Chris@0: { Chris@0: // accented characters etc -- add "ascii version" for dumb search purposes Chris@0: QString ascii; Chris@0: for (int i = 0; i < field.length(); ++i) { Chris@0: QString dc = field[i].decomposition(); Chris@0: if (dc != "") ascii += dc[0]; Chris@0: else if (field[i] == QChar(0x00DF)) { Chris@0: ascii += "ss"; Chris@0: } else { Chris@0: ascii += field[i]; Chris@0: } Chris@0: } Chris@0: ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe Chris@0: ascii.replace(QString::fromUtf8("\342\200\222"), "-"); Chris@0: ascii.replace(QString::fromUtf8("\342\200\223"), "-"); Chris@0: ascii.replace(QString::fromUtf8("\342\200\224"), "-"); Chris@0: ascii.replace(QString::fromUtf8("\342\200\225"), "-"); Chris@0: return ascii; Chris@0: } Chris@0: Chris@0: void Chris@0: asciify(Composer *c) Chris@0: { Chris@0: QString n = c->name(); Chris@0: QString asc = asciify(n); Chris@0: if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc); Chris@0: foreach (QString alias, c->aliases()) { Chris@0: asc = asciify(alias); Chris@0: if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc); Chris@0: } Chris@0: } Chris@0: Chris@0: void Chris@0: asciify(Work *w) Chris@0: { Chris@0: QString n = w->name(); Chris@0: QString asc = asciify(n); Chris@0: if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc); Chris@0: foreach (QString alias, w->aliases()) { Chris@0: asc = asciify(alias); Chris@0: if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc); Chris@0: } Chris@0: } Chris@0: Chris@0: void Chris@0: assignUri(Store *s, Composer *c) Chris@0: { Chris@0: static QSet convSet; Chris@0: QString conv = c->name(); Chris@0: if (!conv.contains(",")) { Chris@0: QStringList sl = conv.split(" "); Chris@0: if (!sl.empty()) { Chris@0: sl.push_front(sl[sl.size()-1]); Chris@0: sl.removeLast(); Chris@0: conv = sl.join(" "); Chris@0: DEBUG << "assignUri: " << c->name() << " -> " << conv << endl; Chris@0: } Chris@0: } Chris@0: conv = asciify(conv); Chris@0: conv.replace(" ", "_"); Chris@0: conv.replace("-", "_"); Chris@0: conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); Chris@0: conv = conv.toLower(); Chris@0: QString initial = conv; Chris@1: int i = 2; Chris@0: while (convSet.contains(conv)) { Chris@0: conv = QString("%1__%2").arg(initial).arg(i); Chris@0: i++; Chris@0: } Chris@0: convSet.insert(conv); Chris@1: c->setProperty("uri", s->expand(":composer/" + conv)); Chris@0: } Chris@0: Chris@0: void Chris@0: assignUri(Store *s, Work *w, Composer *c) Chris@0: { Chris@0: QString pfx = c->property("uri").toUrl().toString(); Chris@0: DEBUG << "pfx = " << pfx << endl; Chris@2: if (!pfx.contains("composer/")) pfx = ":work/"; Chris@2: else { Chris@2: pfx.replace("composer/", "work/"); Chris@2: pfx += "/"; Chris@2: } Chris@0: Chris@0: static QSet convSet; Chris@1: Chris@0: QString conv = w->catalogue(); Chris@0: if (conv == "") conv = w->opus(); Chris@0: conv = conv.replace(".", ""); Chris@0: bool hasOpus = (conv != ""); Chris@1: if (conv == "") conv = w->name().toLower(); Chris@0: if (w->number() != "") conv = conv + "_no" + w->number(); Chris@0: conv = asciify(conv); Chris@0: conv.replace(" ", "_"); Chris@0: conv.replace("-", "_"); Chris@0: conv.replace(":", "_"); Chris@0: conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); Chris@1: Chris@2: if (pfx != "") conv = pfx + conv; Chris@1: Chris@0: // I think actually for works we want to merge duplicates rather than Chris@0: // assign them separate URIs, _unless_ they lack a viable opus number Chris@0: if (!hasOpus) { Chris@0: QString initial = conv; Chris@1: int i = 2; Chris@0: while (convSet.contains(conv)) { Chris@0: conv = QString("%1__%2").arg(initial).arg(i); Chris@0: i++; Chris@0: } Chris@0: } Chris@0: convSet.insert(conv); Chris@1: Chris@1: w->setProperty("uri", conv); Chris@0: } Chris@0: Chris@0: void Chris@0: addDbpediaResource(Store *store, QObject *o, QString s) Chris@0: { Chris@0: QUrl u = o->property("uri").toUrl(); Chris@0: if (u == QUrl()) return; Chris@0: if (s.startsWith("http://en.wikipedia.org/wiki/")) { Chris@0: store->add(Triple(u, Chris@0: "mo:wikipedia", Chris@0: QUrl(s))); Chris@0: s.replace("http://en.wikipedia.org/wiki/", Chris@0: "http://dbpedia.org/resource/"); Chris@0: store->add(Triple(u, Chris@0: "owl:sameAs", Chris@0: QUrl(s))); Chris@0: } Chris@0: } Chris@0: Chris@0: int main(int argc, char **argv) Chris@0: { Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::HistoricalEvent*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Birth*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Death*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Composition*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Work*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Movement*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Composer*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::Document*"); Chris@0: qRegisterMetaType
Chris@0: ("ClassicalData::Form*"); Chris@0: qRegisterMetaType > Chris@0: ("QSet"); Chris@0: qRegisterMetaType > Chris@0: ("QSet"); Chris@0: qRegisterMetaType > Chris@0: ("QSet"); Chris@0: qRegisterMetaType > Chris@0: ("QSet"); Chris@0: qRegisterMetaType > Chris@0: ("QSet"); Chris@4: qRegisterMetaType > Chris@4: ("QSet"); Chris@0: Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::ClassicalComposersOrgImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::ClassicalDotNetImporter*"); Chris@4: qRegisterMetaType Chris@4: ("ClassicalData::ClassicalArchivesImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::WikipediaComposersImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::WikipediaWorksImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::WikipediaWorksKImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::WikipediaWorksListImporter*"); Chris@0: qRegisterMetaType Chris@0: ("ClassicalData::HobokenImporter*"); Chris@0: Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::HistoricalEvent*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Birth*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Death*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Composition*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Work*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Movement*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Composer*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Document*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::Form*"); Chris@0: Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::ClassicalComposersOrgImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::ClassicalDotNetImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@4: ("ClassicalData::ClassicalArchivesImporter*"); Chris@4: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::WikipediaComposersImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::WikipediaWorksImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::WikipediaWorksKImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::WikipediaWorksListImporter*"); Chris@0: ObjectBuilder::getInstance()->registerClass Chris@0: ("ClassicalData::HobokenImporter*"); Chris@0: Chris@0: ContainerBuilder::getInstance()->registerContainer Chris@0: > Chris@0: ("QString", "QSet", ContainerBuilder::SetKind); Chris@0: Chris@0: ContainerBuilder::getInstance()->registerContainer Chris@4: > Chris@4: ("QUrl", "QSet", ContainerBuilder::SetKind); Chris@4: Chris@4: ContainerBuilder::getInstance()->registerContainer Chris@0: > Chris@0: ("ClassicalData::Work*", "QSet", Chris@0: ContainerBuilder::SetKind); Chris@0: Chris@0: ContainerBuilder::getInstance()->registerContainer Chris@0: > Chris@0: ("ClassicalData::Movement*", "QSet", Chris@0: ContainerBuilder::SetKind); Chris@0: Chris@0: ContainerBuilder::getInstance()->registerContainer Chris@0: > Chris@0: ("ClassicalData::Document*", "QSet", Chris@0: ContainerBuilder::SetKind); Chris@0: Chris@0: ContainerBuilder::getInstance()->registerContainer Chris@0: > Chris@0: ("ClassicalData::Form*", "QSet", Chris@0: ContainerBuilder::SetKind); Chris@0: Chris@0: BasicStore *store = BasicStore::load("file:importers.ttl"); Chris@0: ObjectMapper mapper(store); Chris@0: QObject *parentObject = mapper.loadAllObjects(new QObject()); Chris@0: Chris@0: BasicStore *outstore = new BasicStore(); Chris@1: outstore->setBaseUri("http://dbtune.org/classical/resource/"); Chris@0: ObjectMapper outmapper(outstore); Chris@0: Chris@0: outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged); Chris@3: outmapper.setObjectStorePolicy(ObjectMapper::StoreAllObjects); Chris@3: outmapper.setBlankNodePolicy(ObjectMapper::NoBlankNodes); Chris@1: Chris@1: outmapper.setObjectTypePrefix("http://dbtune.org/classical/resource/"); Chris@1: outmapper.setPropertyPrefix("http://dbtune.org/classical/resource/vocab/"); Chris@1: outmapper.setRelationshipPrefix("http://dbtune.org/classical/resource/vocab/relationship/"); Chris@0: Chris@0: outstore->addPrefix("type", outmapper.getObjectTypePrefix()); Chris@1: outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "type/"); Chris@0: outstore->addPrefix("property", outmapper.getPropertyPrefix()); Chris@0: outstore->addPrefix("rel", outmapper.getRelationshipPrefix()); Chris@1: Chris@0: outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/"); Chris@0: outstore->addPrefix("mo", "http://purl.org/ontology/mo/"); Chris@0: outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/"); Chris@0: outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/"); Chris@0: outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#"); Chris@3: outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); Chris@3: outstore->addPrefix("db", "http://dbtune.org/musicbrainz/resource/"); Chris@3: outstore->addPrefix("dbv", "http://dbtune.org/musicbrainz/resource/vocab/"); Chris@3: outstore->addPrefix("cmn", "http://purl.org/ontology/classicalmusicnav#"); Chris@3: outstore->addPrefix("sim", "http://purl.org/ontology/similarity/"); Chris@0: Chris@1: outmapper.addTypeMapping("ClassicalData::Composer", "classical:Composer"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composer", "pages", "foaf:page"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composer", "name", "foaf:name"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", "dbv:alias"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composer", "birth", "property:birth"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composer", "death", "property:death"); Chris@4: outmapper.addPropertyMapping("ClassicalData::Composer", "geonameURIs", "foaf:based_near"); Chris@0: Chris@1: outmapper.addTypeMapping("ClassicalData::Birth", "bio:Birth"); Chris@1: outmapper.addTypeMapping("ClassicalData::Death", "bio:Death"); Chris@4: outmapper.addTypeUriPrefixMapping("ClassicalData::Birth", ":event/"); Chris@4: outmapper.addTypeUriPrefixMapping("ClassicalData::Death", ":event/"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Birth", "year", "bio:date"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Death", "year", "bio:date"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Birth", "place", "bio:place"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Death", "place", "bio:place"); Chris@0: Chris@1: outmapper.addTypeMapping("ClassicalData::Document", "foaf:Document"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Document", "topic", "foaf:primaryTopic"); Chris@0: Chris@1: outmapper.addTypeMapping("ClassicalData::Work", "mo:MusicalWork"); Chris@1: Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "composition", "mo:composed_in"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "opus", "mo:opus"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "catalogue", "mo:catalogue"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "number", "mo:number"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "partOf", "dc:isPartOf"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "parts", "dc:hasPart"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "pages", "foaf:page"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "forms", "property:form"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "key", "mo:key"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "aliases", "dbv:alias"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Work", "name", "dc:title"); Chris@1: Chris@1: outmapper.addTypeMapping("ClassicalData::Composition", "mo:Composition"); Chris@4: outmapper.addTypeUriPrefixMapping("ClassicalData::Composition", ":event/"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composition", "composer", "mo:composer"); Chris@1: outmapper.addPropertyMapping("ClassicalData::Composition", "works", "mo:produced_work"); Chris@1: Chris@1: outstore->add(Triple("classical:Composer", "a", outstore->expand("owl:Class"))); Chris@1: outstore->add(Triple("classical:Composer", "rdfs:subClassOf", outstore->expand("mo:MusicArtist"))); Chris@1: Chris@1: outstore->add(Triple("property:birth", "a", outstore->expand("owl:ObjectProperty"))); Chris@1: outstore->add(Triple("property:birth", "rdfs:subPropertyOf", outstore->expand("bio:event"))); Chris@1: Chris@1: outstore->add(Triple("property:death", "a", outstore->expand("owl:ObjectProperty"))); Chris@1: outstore->add(Triple("property:death", "rdfs:subPropertyOf", outstore->expand("bio:event"))); Chris@0: Chris@0: QList importers = parentObject->findChildren(); Chris@0: std::cerr << "have " << importers.size() << " importers" << std::endl; Chris@0: Chris@0: ComposerMap composers; Chris@0: Chris@0: QList dated; Chris@0: QList undated; Chris@0: Chris@0: QList works; Chris@0: QList compositions; Chris@0: QList other; Chris@0: Chris@0: foreach (Importer *importer, importers) { Chris@0: QObjectList objects = importer->getImportedObjects(); Chris@0: foreach (QObject *o, objects) { Chris@0: Composer *c; Chris@0: if ((c = qobject_cast(o))) { Chris@0: addMiscExpansions(c); Chris@0: asciify(c); Chris@0: if (c->birth() || c->death()) dated.push_back(c); Chris@0: else undated.push_back(c); Chris@0: continue; Chris@0: } Chris@0: Work *w; Chris@0: if ((w = qobject_cast(o))) { Chris@0: asciify(w); Chris@0: works.push_back(w); Chris@0: continue; Chris@0: } Chris@0: Composition *cn; Chris@0: if ((cn = qobject_cast(o))) { Chris@0: compositions.push_back(cn); Chris@0: continue; Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: // get all the dated composers merged before attempting to match Chris@0: // the undated ones Chris@0: foreach (Composer *c, dated) { Chris@0: mergeComposer(c, composers); Chris@0: } Chris@0: foreach (Composer *c, undated) { Chris@0: mergeComposer(c, composers); Chris@0: } Chris@0: Chris@0: QObjectList toStore; Chris@0: Chris@0: QSet cset; Chris@0: for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { Chris@0: foreach (Composer *c, i.value()) { Chris@0: if (!cset.contains(c)) { Chris@0: assignUri(outstore, c); Chris@0: toStore.push_back(c); Chris@0: cset.insert(c); Chris@0: } Chris@0: foreach (Document *d, c->pages()) { Chris@0: QString s = d->uri().toString(); Chris@0: addDbpediaResource(outstore, c, s); Chris@0: } Chris@0: } Chris@0: } Chris@0: Chris@0: QSet storedUris; Chris@0: Chris@0: foreach (Work *w, works) { Chris@0: Composition *cn = w->composition(); Chris@0: if (!cn) continue; Chris@0: if (!cn->composer()) { Chris@0: QString cname = cn->composerName(); Chris@5: QString key = makeNameKey(cname); Chris@0: if (cname != "") { Chris@5: if (!composers.contains(key)) { Chris@0: DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; Chris@0: } else { Chris@5: QSet cs = composers[key]; Chris@0: if (cs.empty()) { Chris@0: DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; Chris@0: } else if (cs.size() > 1) { Chris@0: DEBUG << "Failed to assign Composition to composer: " Chris@0: << cs.size() << " composers match name " << cname << endl; Chris@0: } else { Chris@0: cn->setComposer(*cs.begin()); Chris@0: } Chris@0: } Chris@0: } else { Chris@0: DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl; Chris@0: } Chris@0: } Chris@0: Chris@0: if (cn->composer()) { Chris@0: assignUri(outstore, w, cn->composer()); Chris@0: } Chris@0: Chris@0: foreach (Document *d, w->pages()) { Chris@0: QString s = d->uri().toString(); Chris@0: addDbpediaResource(outstore, w, s); Chris@1: if (!storedUris.contains(s)) { Chris@1: toStore.push_back(d); Chris@1: storedUris.insert(s); Chris@1: } Chris@0: } Chris@0: Chris@0: QString u = w->property("uri").toUrl().toString(); Chris@0: if (u == "" || !storedUris.contains(u)) { Chris@0: toStore.push_back(w); Chris@0: if (u != "") storedUris.insert(u); Chris@0: } Chris@0: } Chris@0: Chris@0: try { Chris@0: outmapper.storeAllObjects(toStore); Chris@0: Chris@0: } catch (RDFException e) { Chris@0: std::cerr << "Caught RDF exception: " << e.what() << std::endl; Chris@0: } Chris@0: Chris@0: DEBUG << "Stored, now saving" << endl; Chris@0: Chris@2: outstore->save("imported.ttl"); Chris@0: Chris@0: DEBUG << "Saved" << endl; Chris@0: Chris@0: Chris@0: QMultiMap cmap; Chris@0: foreach (Composer *c, cset) { Chris@0: QString n = c->getSortName(true); Chris@0: cmap.insert(n, c); Chris@0: } Chris@0: Chris@0: std::cout << "Composers: " << cmap.size() << std::endl; Chris@0: Chris@0: for (QMultiMap::iterator i = cmap.begin(); Chris@0: i != cmap.end(); ++i) { Chris@0: Chris@0: QString n = i.key(); Chris@0: Composer *c = i.value(); Chris@0: Chris@0: std::cout << n.toStdString(); Chris@0: Chris@0: QString d = c->getDisplayDates(); Chris@0: if (d != "") std::cout << " (" << d.toStdString() << ")"; Chris@0: std::cout << std::endl; Chris@0: } Chris@0: Chris@0: std::cout << std::endl; Chris@0: Chris@0: std::cout << "Works by composer:" << std::endl; Chris@0: Chris@0: for (QMultiMap::iterator i = cmap.begin(); Chris@0: i != cmap.end(); ++i) { Chris@0: Chris@0: QString n = i.key(); Chris@0: Composer *c = i.value(); Chris@0: Chris@0: std::set wmap; Chris@0: foreach (Work *w, works) { Chris@0: Composition *cn = w->composition(); Chris@0: if (!cn) continue; Chris@0: if (cn->composer() != c) continue; Chris@0: if (w->partOf()) continue; Chris@0: wmap.insert(w); Chris@0: } Chris@0: Chris@0: if (wmap.empty()) continue; Chris@0: Chris@0: std::cout << n.toStdString() << std::endl; Chris@0: Chris@0: foreach (Work *w, wmap) { Chris@0: std::cout << " * "; Chris@0: std::cout << w->name().toStdString(); Chris@0: if (w->catalogue() != "") { Chris@0: std::cout << " [" << w->catalogue().toStdString() << "]"; Chris@0: } Chris@0: if (w->opus() != "") { Chris@0: std::cout << " [op. " << w->opus().toStdString() << "]"; Chris@0: } Chris@0: std::cout << std::endl; Chris@0: std::set orderedParts; Chris@0: foreach (Work *ww, w->parts()) { Chris@0: orderedParts.insert(ww); Chris@0: } Chris@0: foreach (Work *ww, orderedParts) { Chris@0: std::cout << " "; Chris@0: if (ww->number() != "") { Chris@0: std::cout << ww->number().toStdString() << ". "; Chris@0: } Chris@0: std::cout << ww->name().toStdString(); Chris@0: if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) { Chris@0: std::cout << " [" << ww->catalogue().toStdString() << "]"; Chris@0: } Chris@0: if (ww->opus() != "" && ww->opus() != w->opus()) { Chris@0: std::cout << " [op. " << ww->opus().toStdString() << "]"; Chris@0: } Chris@0: std::cout << std::endl; Chris@0: } Chris@0: } Chris@0: Chris@0: std::cout << std::endl; Chris@0: } Chris@0: Chris@0: delete outstore; Chris@0: Chris@0: DEBUG << "Done" << endl; Chris@0: Chris@0: Chris@0: } Chris@0: Chris@0: