annotate import/Import.cpp @ 4:719a4f477098 classical-rdf

* Add Classical Archives composer list importer; run it
author Chris Cannam
date Thu, 10 Dec 2009 15:15:40 +0000
parents 62324586b8d0
children d23a4c935a22
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "Objects.h"
Chris@0 4
Chris@0 5 #include <dataquay/BasicStore.h>
Chris@0 6 #include <dataquay/RDFException.h>
Chris@0 7 #include <dataquay/objectmapper/ObjectMapper.h>
Chris@0 8 #include <dataquay/objectmapper/ObjectBuilder.h>
Chris@0 9 #include <dataquay/objectmapper/ContainerBuilder.h>
Chris@0 10
Chris@0 11 #include "ImportClassicalComposersOrg.h"
Chris@0 12 #include "ImportClassicalDotNet.h"
Chris@4 13 #include "ImportClassicalArchives.h"
Chris@0 14 #include "ImportWikipediaComposers.h"
Chris@0 15 #include "ImportWikipediaWorks.h"
Chris@0 16 #include "ImportWikipediaWorksK.h"
Chris@0 17 #include "ImportWikipediaWorksList.h"
Chris@0 18 #include "ImportHoboken.h"
Chris@0 19
Chris@0 20 #include <dataquay/Debug.h>
Chris@0 21
Chris@0 22 using namespace ClassicalData;
Chris@0 23 using namespace Dataquay;
Chris@0 24
Chris@0 25 #include <iostream>
Chris@0 26 #include <set>
Chris@0 27
Chris@0 28 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
Chris@0 29
Chris@0 30 void
Chris@0 31 addMiscExpansions(Composer *c)
Chris@0 32 {
Chris@0 33 QString n = c->name();
Chris@0 34
Chris@0 35 DEBUG << "addMiscExpansions: n = " << n << endl;
Chris@0 36
Chris@0 37 // lovely hard-coded special cases go here! some of these are
Chris@0 38 // needed for works->composer assignments
Chris@0 39 if (n == "Balakirev, Milii") {
Chris@0 40 c->addAlias("Mily Balakirev");
Chris@0 41 }
Chris@0 42 if (n.startsWith("Cui, C")) {
Chris@0 43 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
Chris@0 44 }
Chris@0 45 if (n == "Handel, George Frideric") {
Chris@0 46 c->addAlias("Handel, Georg Friedrich");
Chris@0 47 c->addAlias("Handel");
Chris@0 48 }
Chris@1 49 if (n == "Prokofiev, Sergey") {
Chris@1 50 c->addAlias("Prokofieff, Sergei");
Chris@1 51 c->addAlias("Sergei Prokofieff");
Chris@1 52 }
Chris@1 53 if (n == "Rossini, Gioacchino") {
Chris@1 54 c->addAlias("Rossini, Gioachino");
Chris@1 55 c->addAlias("Gioachino Rossini");
Chris@1 56 }
Chris@1 57 if (n == "Edwards, Richard") {
Chris@1 58 c->addAlias("Edwardes, Richard");
Chris@1 59 c->addAlias("Richard Edwardes");
Chris@1 60 c->addAlias("Richard Edwards");
Chris@1 61 }
Chris@1 62 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") {
Chris@1 63 c->addAlias("Nikolai Rimsky-Korsakov");
Chris@1 64 }
Chris@1 65 if (n.startsWith("Piccinni, Nico")) {
Chris@1 66 c->addAlias(n);
Chris@1 67 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262"));
Chris@1 68 }
Chris@1 69 if (n == "Tchaikovsky, Pyotr Ilyich") {
Chris@1 70 c->addAlias("Tchaikovsky, Piotr Ilyitch");
Chris@1 71 }
Chris@1 72 if (n == "Wilhelm Stenhammar") {
Chris@1 73 c->addAlias("Stenhammar, Vilhelm Eugene");
Chris@1 74 c->setName("Stenhammar, Wilhelm");
Chris@1 75 c->addAlias(n);
Chris@1 76 }
Chris@1 77 if (n == "Mercadante, Saverio Rafaele") {
Chris@1 78 c->addAlias("Mercadante, Giuseppe");
Chris@1 79 }
Chris@1 80 if (n == "Johann Wenzel Anton Stamitz") {
Chris@1 81 c->addAlias(n);
Chris@1 82 c->setName("Stamitz, Johann Wenzel Anton");
Chris@1 83 c->addAlias("Stamitz, Jan Vaclav");
Chris@1 84 }
Chris@1 85 if (n == "Mario Castelnuovo-Tedesco") {
Chris@1 86 c->addAlias("Castelnuovo Tedesco, Mario");
Chris@1 87 }
Chris@0 88 if (n == "Mayr, Simon") {
Chris@0 89 c->addAlias("Mayr");
Chris@0 90 }
Chris@0 91
Chris@0 92 n.replace(", Sr.", " Sr.");
Chris@0 93 n.replace(", Jr.", " Jr.");
Chris@0 94
Chris@0 95 int comma = n.indexOf(", ");
Chris@0 96 if (comma > 0 && comma + 2 < n.length()) {
Chris@0 97
Chris@0 98 QString left = n.left(comma);
Chris@0 99 QString right = n.right(n.length() - comma - 2);
Chris@0 100
Chris@0 101 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
Chris@0 102 if (jrsr.indexIn(right) >= 0) {
Chris@0 103 left = left + jrsr.cap(1);
Chris@0 104 right = right.left(right.length()-jrsr.matchedLength());
Chris@0 105 }
Chris@0 106 n = right + " " + left;
Chris@0 107 }
Chris@0 108
Chris@0 109 if (n != c->name()) c->addAlias(n);
Chris@0 110
Chris@0 111 if (n.contains("Sergey")) {
Chris@0 112 QString nn(n);
Chris@0 113 nn.replace("Sergey", "Sergei");
Chris@0 114 c->addAlias(nn);
Chris@1 115 } else if (n.contains("Sergei")) {
Chris@1 116 QString nn(n);
Chris@1 117 nn.replace("Sergei", "Sergey");
Chris@1 118 c->addAlias(nn);
Chris@0 119 }
Chris@0 120
Chris@0 121 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
Chris@0 122 if (sr.indexIn(n) >= 0) {
Chris@0 123 QString nr = n;
Chris@0 124 nr.replace(sr.pos(0), sr.matchedLength(), " I");
Chris@0 125 nr.replace(" ", " ");
Chris@0 126 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 127 c->addAlias(nr);
Chris@0 128 }
Chris@0 129 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
Chris@0 130 if (jr.indexIn(n) >= 0) {
Chris@0 131 QString nr = n;
Chris@0 132 nr.replace(jr.pos(0), jr.matchedLength(), " II");
Chris@0 133 nr.replace(" ", " ");
Chris@0 134 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 135 c->addAlias(nr);
Chris@0 136 }
Chris@0 137 QString nr = n;
Chris@0 138 nr.replace("(I)", "I");
Chris@0 139 nr.replace("(II)", "II");
Chris@0 140 nr.replace("(III)", "III");
Chris@0 141 c->addAlias(nr);
Chris@0 142 }
Chris@0 143
Chris@0 144 bool namesFuzzyMatch(QString an, Composer *b)
Chris@0 145 {
Chris@0 146 // ew!
Chris@0 147
Chris@0 148 QString bn = b->name();
Chris@0 149 if (bn == an) return true;
Chris@0 150 if (b->aliases().contains(an)) return true;
Chris@0 151 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@0 152 if (an.contains(",")) {
Chris@0 153 an.replace(",", "");
Chris@0 154 } else {
Chris@0 155 aSurnameIndex = -1;
Chris@0 156 }
Chris@0 157 if (bn.contains(",")) {
Chris@0 158 bn.replace(",", "");
Chris@0 159 } else {
Chris@0 160 bSurnameIndex = -1;
Chris@0 161 }
Chris@0 162 QStringList nl = an.split(QRegExp("[ -]"));
Chris@0 163 QStringList bnl = bn.split(QRegExp("[ -]"));
Chris@0 164 int matchCount = 0;
Chris@0 165 QString surnameMatch = "";
Chris@0 166 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@0 167 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@0 168 if (nl[aSurnameIndex][0].isUpper() &&
Chris@0 169 nl[aSurnameIndex] != "Della" &&
Chris@0 170 nl[aSurnameIndex] == bnl[bSurnameIndex]) {
Chris@0 171 surnameMatch = nl[aSurnameIndex];
Chris@0 172 }
Chris@0 173 foreach (QString elt, nl) {
Chris@0 174 if (!elt[0].isUpper() || elt == "Della") continue;
Chris@0 175 if (bnl.contains(elt)) {
Chris@0 176 ++matchCount;
Chris@0 177 continue;
Chris@0 178 }
Chris@0 179 }
Chris@0 180 if (matchCount > 1 && surnameMatch != "") {
Chris@0 181 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@0 182 return true;
Chris@0 183 }
Chris@0 184 return false;
Chris@0 185 }
Chris@0 186
Chris@0 187 bool
Chris@0 188 hasBetterName(Composer *c, Composer *other)
Chris@0 189 {
Chris@0 190 if (c->name() == other->name()) return false;
Chris@0 191
Chris@0 192 // Try to guess which of c and other is more likely to have a good
Chris@0 193 // "canonical form" of the composer's name
Chris@0 194
Chris@0 195 if (c->name().startsWith("van ")) {
Chris@0 196 return false; // wrong choice of sort for e.g. LvB; should be
Chris@0 197 // Beethoven, Ludwig van, not van Beethoven, Ludwig
Chris@0 198 }
Chris@0 199 if (other->name().startsWith("van ")) {
Chris@0 200 return true;
Chris@0 201 }
Chris@0 202
Chris@0 203 if (c->aliases().size() != other->aliases().size()) {
Chris@0 204 // a rather weak heuristic
Chris@0 205 return c->aliases().size() > other->aliases().size();
Chris@0 206 }
Chris@0 207
Chris@0 208 if (c->name().contains(',') && !other->name().contains(',')) {
Chris@0 209 // another rather weak heuristic
Chris@0 210 return true;
Chris@0 211 }
Chris@0 212
Chris@0 213 return false;
Chris@0 214 }
Chris@0 215
Chris@0 216 void mergeComposer(Composer *c, ComposerMap &composers)
Chris@0 217 {
Chris@0 218 QString name = c->name();
Chris@0 219
Chris@0 220 QSet<QString> allNames = c->aliases();
Chris@0 221 allNames.insert(name);
Chris@0 222
Chris@0 223 QString dates;
Chris@0 224 if (c->birth()) {
Chris@0 225 if (c->death()) {
Chris@0 226 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
Chris@0 227 } else {
Chris@0 228 dates = QString("%1-").arg(c->birth()->year());
Chris@0 229 }
Chris@0 230 }
Chris@0 231 if (dates != "") {
Chris@0 232 allNames.insert(dates);
Chris@0 233 }
Chris@0 234
Chris@0 235 QSet<Composer *> matches;
Chris@0 236
Chris@0 237 foreach (QString candidateName, allNames) {
Chris@0 238 QString key = candidateName.toLower();
Chris@0 239 if (composers.contains(key)) {
Chris@0 240 foreach (Composer *candidate, composers[key]) {
Chris@0 241 if (candidateName == dates) {
Chris@0 242 if (!namesFuzzyMatch(c->name(), candidate) &&
Chris@0 243 !namesFuzzyMatch(candidate->name(), c)) {
Chris@0 244 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
Chris@0 245 continue;
Chris@0 246 } else {
Chris@0 247 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
Chris@0 248 }
Chris@0 249 } else {
Chris@1 250 if (!c->datesMatch(candidate)) {
Chris@0 251 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
Chris@0 252 continue;
Chris@0 253 }
Chris@0 254 }
Chris@0 255 matches.insert(candidate);
Chris@0 256 }
Chris@0 257 }
Chris@0 258 }
Chris@0 259
Chris@0 260 if (matches.empty()) {
Chris@0 261 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
Chris@0 262
Chris@0 263 if (!c->birth() && !c->death()) {
Chris@0 264 // laboriously look for fuzzy match across _all_ composers
Chris@0 265 for (ComposerMap::iterator i = composers.begin();
Chris@0 266 i != composers.end(); ++i) {
Chris@0 267 foreach (Composer *candidate, *i) {
Chris@0 268 if (namesFuzzyMatch(c->name(), candidate)) {
Chris@0 269 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
Chris@0 270 matches.insert(candidate);
Chris@0 271 break;
Chris@0 272 }
Chris@0 273 }
Chris@0 274 if (!matches.empty()) break;
Chris@0 275 }
Chris@0 276 }
Chris@0 277
Chris@0 278 if (matches.empty()) {
Chris@0 279 foreach (QString candidateName, allNames) {
Chris@0 280 composers[candidateName.toLower()].insert(c);
Chris@0 281 DEBUG << "added for alias or date " << candidateName << endl;
Chris@0 282 }
Chris@0 283 return;
Chris@0 284 }
Chris@0 285 }
Chris@0 286
Chris@0 287 if (matches.size() > 1) {
Chris@0 288 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
Chris@0 289 }
Chris@0 290
Chris@0 291 Composer *other = *matches.begin();
Chris@0 292
Chris@0 293 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
Chris@0 294
Chris@0 295 if (hasBetterName(c, other)) {
Chris@0 296 other->addAlias(other->name());
Chris@0 297 other->setName(c->name());
Chris@0 298 } else {
Chris@0 299 other->addAlias(c->name());
Chris@0 300 }
Chris@0 301 composers[c->name().toLower()].insert(other);
Chris@0 302 DEBUG << "linking from alias " << c->name() << endl;
Chris@0 303
Chris@0 304 foreach (QString alias, c->aliases()) {
Chris@0 305 if (alias != other->name() &&
Chris@0 306 !other->aliases().contains(alias)) {
Chris@0 307 other->addAlias(alias);
Chris@0 308 composers[alias.toLower()].insert(other);
Chris@0 309 DEBUG << "linking from alias " << alias << endl;
Chris@0 310 }
Chris@0 311 }
Chris@0 312
Chris@0 313 foreach (Document *d, c->pages()) {
Chris@0 314 bool found = false;
Chris@0 315 foreach (Document *dd, other->pages()) {
Chris@0 316 if (d->uri() == dd->uri()) {
Chris@0 317 found = true;
Chris@0 318 break;
Chris@0 319 }
Chris@0 320 }
Chris@0 321 if (!found) {
Chris@0 322 d->setTopic(other);
Chris@0 323 other->addPage(d);
Chris@0 324 }
Chris@0 325 }
Chris@0 326
Chris@0 327 //!!! actually the "approximate" bits of the following are bogus;
Chris@0 328 // a source reporting birth or death date as approx is probably
Chris@0 329 // more accurate than one reporting an exact date
Chris@0 330
Chris@0 331 if (c->birth()) {
Chris@0 332 if (!other->birth() || other->birth()->approximate()) {
Chris@0 333 other->setBirth(c->birth());
Chris@0 334 }
Chris@0 335 }
Chris@0 336
Chris@0 337 if (c->death()) {
Chris@0 338 if (!other->death() || other->death()->approximate()) {
Chris@0 339 other->setDeath(c->death());
Chris@0 340 }
Chris@0 341 }
Chris@0 342
Chris@0 343 if (c->gender() != "") other->setGender(c->gender());
Chris@4 344
Chris@4 345 foreach (QString s, c->nationality()) {
Chris@4 346 other->addNationality(s);
Chris@4 347 }
Chris@4 348
Chris@4 349 foreach (QUrl s, c->geonameURIs()) {
Chris@4 350 other->addGeonameURI(s);
Chris@4 351 }
Chris@4 352
Chris@0 353 if (c->remarks() != "") other->setRemarks(c->remarks());
Chris@0 354 if (c->period() != "") other->setPeriod(c->period());
Chris@0 355
Chris@0 356 }
Chris@0 357
Chris@0 358 QString
Chris@0 359 asciify(QString field)
Chris@0 360 {
Chris@0 361 // accented characters etc -- add "ascii version" for dumb search purposes
Chris@0 362 QString ascii;
Chris@0 363 for (int i = 0; i < field.length(); ++i) {
Chris@0 364 QString dc = field[i].decomposition();
Chris@0 365 if (dc != "") ascii += dc[0];
Chris@0 366 else if (field[i] == QChar(0x00DF)) {
Chris@0 367 ascii += "ss";
Chris@0 368 } else {
Chris@0 369 ascii += field[i];
Chris@0 370 }
Chris@0 371 }
Chris@0 372 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
Chris@0 373 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
Chris@0 374 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
Chris@0 375 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
Chris@0 376 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
Chris@0 377 return ascii;
Chris@0 378 }
Chris@0 379
Chris@0 380 void
Chris@0 381 asciify(Composer *c)
Chris@0 382 {
Chris@0 383 QString n = c->name();
Chris@0 384 QString asc = asciify(n);
Chris@0 385 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 386 foreach (QString alias, c->aliases()) {
Chris@0 387 asc = asciify(alias);
Chris@0 388 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 389 }
Chris@0 390 }
Chris@0 391
Chris@0 392 void
Chris@0 393 asciify(Work *w)
Chris@0 394 {
Chris@0 395 QString n = w->name();
Chris@0 396 QString asc = asciify(n);
Chris@0 397 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 398 foreach (QString alias, w->aliases()) {
Chris@0 399 asc = asciify(alias);
Chris@0 400 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 401 }
Chris@0 402 }
Chris@0 403
Chris@0 404 void
Chris@0 405 assignUri(Store *s, Composer *c)
Chris@0 406 {
Chris@0 407 static QSet<QString> convSet;
Chris@0 408 QString conv = c->name();
Chris@0 409 if (!conv.contains(",")) {
Chris@0 410 QStringList sl = conv.split(" ");
Chris@0 411 if (!sl.empty()) {
Chris@0 412 sl.push_front(sl[sl.size()-1]);
Chris@0 413 sl.removeLast();
Chris@0 414 conv = sl.join(" ");
Chris@0 415 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
Chris@0 416 }
Chris@0 417 }
Chris@0 418 conv = asciify(conv);
Chris@0 419 conv.replace(" ", "_");
Chris@0 420 conv.replace("-", "_");
Chris@0 421 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@0 422 conv = conv.toLower();
Chris@0 423 QString initial = conv;
Chris@1 424 int i = 2;
Chris@0 425 while (convSet.contains(conv)) {
Chris@0 426 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 427 i++;
Chris@0 428 }
Chris@0 429 convSet.insert(conv);
Chris@1 430 c->setProperty("uri", s->expand(":composer/" + conv));
Chris@0 431 }
Chris@0 432
Chris@0 433 void
Chris@0 434 assignUri(Store *s, Work *w, Composer *c)
Chris@0 435 {
Chris@0 436 QString pfx = c->property("uri").toUrl().toString();
Chris@0 437 DEBUG << "pfx = " << pfx << endl;
Chris@2 438 if (!pfx.contains("composer/")) pfx = ":work/";
Chris@2 439 else {
Chris@2 440 pfx.replace("composer/", "work/");
Chris@2 441 pfx += "/";
Chris@2 442 }
Chris@0 443
Chris@0 444 static QSet<QString> convSet;
Chris@1 445
Chris@0 446 QString conv = w->catalogue();
Chris@0 447 if (conv == "") conv = w->opus();
Chris@0 448 conv = conv.replace(".", "");
Chris@0 449 bool hasOpus = (conv != "");
Chris@1 450 if (conv == "") conv = w->name().toLower();
Chris@0 451 if (w->number() != "") conv = conv + "_no" + w->number();
Chris@0 452 conv = asciify(conv);
Chris@0 453 conv.replace(" ", "_");
Chris@0 454 conv.replace("-", "_");
Chris@0 455 conv.replace(":", "_");
Chris@0 456 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@1 457
Chris@2 458 if (pfx != "") conv = pfx + conv;
Chris@1 459
Chris@0 460 // I think actually for works we want to merge duplicates rather than
Chris@0 461 // assign them separate URIs, _unless_ they lack a viable opus number
Chris@0 462 if (!hasOpus) {
Chris@0 463 QString initial = conv;
Chris@1 464 int i = 2;
Chris@0 465 while (convSet.contains(conv)) {
Chris@0 466 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 467 i++;
Chris@0 468 }
Chris@0 469 }
Chris@0 470 convSet.insert(conv);
Chris@1 471
Chris@1 472 w->setProperty("uri", conv);
Chris@0 473 }
Chris@0 474
Chris@0 475 void
Chris@0 476 addDbpediaResource(Store *store, QObject *o, QString s)
Chris@0 477 {
Chris@0 478 QUrl u = o->property("uri").toUrl();
Chris@0 479 if (u == QUrl()) return;
Chris@0 480 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
Chris@0 481 store->add(Triple(u,
Chris@0 482 "mo:wikipedia",
Chris@0 483 QUrl(s)));
Chris@0 484 s.replace("http://en.wikipedia.org/wiki/",
Chris@0 485 "http://dbpedia.org/resource/");
Chris@0 486 store->add(Triple(u,
Chris@0 487 "owl:sameAs",
Chris@0 488 QUrl(s)));
Chris@0 489 }
Chris@0 490 }
Chris@0 491
Chris@0 492 int main(int argc, char **argv)
Chris@0 493 {
Chris@0 494 qRegisterMetaType<HistoricalEvent *>
Chris@0 495 ("ClassicalData::HistoricalEvent*");
Chris@0 496 qRegisterMetaType<Birth *>
Chris@0 497 ("ClassicalData::Birth*");
Chris@0 498 qRegisterMetaType<Death *>
Chris@0 499 ("ClassicalData::Death*");
Chris@0 500 qRegisterMetaType<Composition *>
Chris@0 501 ("ClassicalData::Composition*");
Chris@0 502 qRegisterMetaType<Work *>
Chris@0 503 ("ClassicalData::Work*");
Chris@0 504 qRegisterMetaType<Movement *>
Chris@0 505 ("ClassicalData::Movement*");
Chris@0 506 qRegisterMetaType<Composer *>
Chris@0 507 ("ClassicalData::Composer*");
Chris@0 508 qRegisterMetaType<Document *>
Chris@0 509 ("ClassicalData::Document*");
Chris@0 510 qRegisterMetaType<Form *>
Chris@0 511 ("ClassicalData::Form*");
Chris@0 512 qRegisterMetaType<QSet<Work *> >
Chris@0 513 ("QSet<ClassicalData::Work*>");
Chris@0 514 qRegisterMetaType<QSet<Movement *> >
Chris@0 515 ("QSet<ClassicalData::Movement*>");
Chris@0 516 qRegisterMetaType<QSet<Document *> >
Chris@0 517 ("QSet<ClassicalData::Document*>");
Chris@0 518 qRegisterMetaType<QSet<Form *> >
Chris@0 519 ("QSet<ClassicalData::Form*>");
Chris@0 520 qRegisterMetaType<QSet<QString> >
Chris@0 521 ("QSet<QString>");
Chris@4 522 qRegisterMetaType<QSet<QUrl> >
Chris@4 523 ("QSet<QUrl>");
Chris@0 524
Chris@0 525 qRegisterMetaType<ClassicalComposersOrgImporter *>
Chris@0 526 ("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 527 qRegisterMetaType<ClassicalDotNetImporter *>
Chris@0 528 ("ClassicalData::ClassicalDotNetImporter*");
Chris@4 529 qRegisterMetaType<ClassicalArchivesImporter *>
Chris@4 530 ("ClassicalData::ClassicalArchivesImporter*");
Chris@0 531 qRegisterMetaType<WikipediaComposersImporter *>
Chris@0 532 ("ClassicalData::WikipediaComposersImporter*");
Chris@0 533 qRegisterMetaType<WikipediaWorksImporter *>
Chris@0 534 ("ClassicalData::WikipediaWorksImporter*");
Chris@0 535 qRegisterMetaType<WikipediaWorksKImporter *>
Chris@0 536 ("ClassicalData::WikipediaWorksKImporter*");
Chris@0 537 qRegisterMetaType<WikipediaWorksListImporter *>
Chris@0 538 ("ClassicalData::WikipediaWorksListImporter*");
Chris@0 539 qRegisterMetaType<HobokenImporter *>
Chris@0 540 ("ClassicalData::HobokenImporter*");
Chris@0 541
Chris@0 542 ObjectBuilder::getInstance()->registerClass
Chris@0 543 <HistoricalEvent>("ClassicalData::HistoricalEvent*");
Chris@0 544 ObjectBuilder::getInstance()->registerClass
Chris@0 545 <Birth>("ClassicalData::Birth*");
Chris@0 546 ObjectBuilder::getInstance()->registerClass
Chris@0 547 <Death>("ClassicalData::Death*");
Chris@0 548 ObjectBuilder::getInstance()->registerClass
Chris@0 549 <Composition>("ClassicalData::Composition*");
Chris@0 550 ObjectBuilder::getInstance()->registerClass
Chris@0 551 <Work, QObject>("ClassicalData::Work*");
Chris@0 552 ObjectBuilder::getInstance()->registerClass
Chris@0 553 <Movement, QObject>("ClassicalData::Movement*");
Chris@0 554 ObjectBuilder::getInstance()->registerClass
Chris@0 555 <Composer, QObject>("ClassicalData::Composer*");
Chris@0 556 ObjectBuilder::getInstance()->registerClass
Chris@0 557 <Document, QObject>("ClassicalData::Document*");
Chris@0 558 ObjectBuilder::getInstance()->registerClass
Chris@0 559 <Form, QObject>("ClassicalData::Form*");
Chris@0 560
Chris@0 561 ObjectBuilder::getInstance()->registerClass
Chris@0 562 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 563 ObjectBuilder::getInstance()->registerClass
Chris@0 564 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
Chris@0 565 ObjectBuilder::getInstance()->registerClass
Chris@4 566 <ClassicalArchivesImporter>("ClassicalData::ClassicalArchivesImporter*");
Chris@4 567 ObjectBuilder::getInstance()->registerClass
Chris@0 568 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
Chris@0 569 ObjectBuilder::getInstance()->registerClass
Chris@0 570 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
Chris@0 571 ObjectBuilder::getInstance()->registerClass
Chris@0 572 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
Chris@0 573 ObjectBuilder::getInstance()->registerClass
Chris@0 574 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
Chris@0 575 ObjectBuilder::getInstance()->registerClass
Chris@0 576 <HobokenImporter>("ClassicalData::HobokenImporter*");
Chris@0 577
Chris@0 578 ContainerBuilder::getInstance()->registerContainer
Chris@0 579 <QString, QSet<QString> >
Chris@0 580 ("QString", "QSet<QString>", ContainerBuilder::SetKind);
Chris@0 581
Chris@0 582 ContainerBuilder::getInstance()->registerContainer
Chris@4 583 <QUrl, QSet<QUrl> >
Chris@4 584 ("QUrl", "QSet<QUrl>", ContainerBuilder::SetKind);
Chris@4 585
Chris@4 586 ContainerBuilder::getInstance()->registerContainer
Chris@0 587 <Work*, QSet<Work*> >
Chris@0 588 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
Chris@0 589 ContainerBuilder::SetKind);
Chris@0 590
Chris@0 591 ContainerBuilder::getInstance()->registerContainer
Chris@0 592 <Movement*, QSet<Movement*> >
Chris@0 593 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
Chris@0 594 ContainerBuilder::SetKind);
Chris@0 595
Chris@0 596 ContainerBuilder::getInstance()->registerContainer
Chris@0 597 <Document*, QSet<Document*> >
Chris@0 598 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
Chris@0 599 ContainerBuilder::SetKind);
Chris@0 600
Chris@0 601 ContainerBuilder::getInstance()->registerContainer
Chris@0 602 <Form*, QSet<Form*> >
Chris@0 603 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
Chris@0 604 ContainerBuilder::SetKind);
Chris@0 605
Chris@0 606 BasicStore *store = BasicStore::load("file:importers.ttl");
Chris@0 607 ObjectMapper mapper(store);
Chris@0 608 QObject *parentObject = mapper.loadAllObjects(new QObject());
Chris@0 609
Chris@0 610 BasicStore *outstore = new BasicStore();
Chris@1 611 outstore->setBaseUri("http://dbtune.org/classical/resource/");
Chris@0 612 ObjectMapper outmapper(outstore);
Chris@0 613
Chris@0 614 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
Chris@3 615 outmapper.setObjectStorePolicy(ObjectMapper::StoreAllObjects);
Chris@3 616 outmapper.setBlankNodePolicy(ObjectMapper::NoBlankNodes);
Chris@1 617
Chris@1 618 outmapper.setObjectTypePrefix("http://dbtune.org/classical/resource/");
Chris@1 619 outmapper.setPropertyPrefix("http://dbtune.org/classical/resource/vocab/");
Chris@1 620 outmapper.setRelationshipPrefix("http://dbtune.org/classical/resource/vocab/relationship/");
Chris@0 621
Chris@0 622 outstore->addPrefix("type", outmapper.getObjectTypePrefix());
Chris@1 623 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "type/");
Chris@0 624 outstore->addPrefix("property", outmapper.getPropertyPrefix());
Chris@0 625 outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
Chris@1 626
Chris@0 627 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
Chris@0 628 outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
Chris@0 629 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
Chris@0 630 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
Chris@0 631 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
Chris@3 632 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
Chris@3 633 outstore->addPrefix("db", "http://dbtune.org/musicbrainz/resource/");
Chris@3 634 outstore->addPrefix("dbv", "http://dbtune.org/musicbrainz/resource/vocab/");
Chris@3 635 outstore->addPrefix("cmn", "http://purl.org/ontology/classicalmusicnav#");
Chris@3 636 outstore->addPrefix("sim", "http://purl.org/ontology/similarity/");
Chris@0 637
Chris@1 638 outmapper.addTypeMapping("ClassicalData::Composer", "classical:Composer");
Chris@1 639 outmapper.addPropertyMapping("ClassicalData::Composer", "pages", "foaf:page");
Chris@1 640 outmapper.addPropertyMapping("ClassicalData::Composer", "name", "foaf:name");
Chris@1 641 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", "dbv:alias");
Chris@1 642 outmapper.addPropertyMapping("ClassicalData::Composer", "birth", "property:birth");
Chris@1 643 outmapper.addPropertyMapping("ClassicalData::Composer", "death", "property:death");
Chris@4 644 outmapper.addPropertyMapping("ClassicalData::Composer", "geonameURIs", "foaf:based_near");
Chris@0 645
Chris@1 646 outmapper.addTypeMapping("ClassicalData::Birth", "bio:Birth");
Chris@1 647 outmapper.addTypeMapping("ClassicalData::Death", "bio:Death");
Chris@4 648 outmapper.addTypeUriPrefixMapping("ClassicalData::Birth", ":event/");
Chris@4 649 outmapper.addTypeUriPrefixMapping("ClassicalData::Death", ":event/");
Chris@1 650 outmapper.addPropertyMapping("ClassicalData::Birth", "year", "bio:date");
Chris@1 651 outmapper.addPropertyMapping("ClassicalData::Death", "year", "bio:date");
Chris@1 652 outmapper.addPropertyMapping("ClassicalData::Birth", "place", "bio:place");
Chris@1 653 outmapper.addPropertyMapping("ClassicalData::Death", "place", "bio:place");
Chris@0 654
Chris@1 655 outmapper.addTypeMapping("ClassicalData::Document", "foaf:Document");
Chris@1 656 outmapper.addPropertyMapping("ClassicalData::Document", "topic", "foaf:primaryTopic");
Chris@0 657
Chris@1 658 outmapper.addTypeMapping("ClassicalData::Work", "mo:MusicalWork");
Chris@1 659
Chris@1 660 outmapper.addPropertyMapping("ClassicalData::Work", "composition", "mo:composed_in");
Chris@1 661 outmapper.addPropertyMapping("ClassicalData::Work", "opus", "mo:opus");
Chris@1 662 outmapper.addPropertyMapping("ClassicalData::Work", "catalogue", "mo:catalogue");
Chris@1 663 outmapper.addPropertyMapping("ClassicalData::Work", "number", "mo:number");
Chris@1 664 outmapper.addPropertyMapping("ClassicalData::Work", "partOf", "dc:isPartOf");
Chris@1 665 outmapper.addPropertyMapping("ClassicalData::Work", "parts", "dc:hasPart");
Chris@1 666 outmapper.addPropertyMapping("ClassicalData::Work", "pages", "foaf:page");
Chris@1 667 outmapper.addPropertyMapping("ClassicalData::Work", "forms", "property:form");
Chris@1 668 outmapper.addPropertyMapping("ClassicalData::Work", "key", "mo:key");
Chris@1 669 outmapper.addPropertyMapping("ClassicalData::Work", "aliases", "dbv:alias");
Chris@1 670 outmapper.addPropertyMapping("ClassicalData::Work", "name", "dc:title");
Chris@1 671
Chris@1 672 outmapper.addTypeMapping("ClassicalData::Composition", "mo:Composition");
Chris@4 673 outmapper.addTypeUriPrefixMapping("ClassicalData::Composition", ":event/");
Chris@1 674 outmapper.addPropertyMapping("ClassicalData::Composition", "composer", "mo:composer");
Chris@1 675 outmapper.addPropertyMapping("ClassicalData::Composition", "works", "mo:produced_work");
Chris@1 676
Chris@1 677 outstore->add(Triple("classical:Composer", "a", outstore->expand("owl:Class")));
Chris@1 678 outstore->add(Triple("classical:Composer", "rdfs:subClassOf", outstore->expand("mo:MusicArtist")));
Chris@1 679
Chris@1 680 outstore->add(Triple("property:birth", "a", outstore->expand("owl:ObjectProperty")));
Chris@1 681 outstore->add(Triple("property:birth", "rdfs:subPropertyOf", outstore->expand("bio:event")));
Chris@1 682
Chris@1 683 outstore->add(Triple("property:death", "a", outstore->expand("owl:ObjectProperty")));
Chris@1 684 outstore->add(Triple("property:death", "rdfs:subPropertyOf", outstore->expand("bio:event")));
Chris@0 685
Chris@0 686 QList<Importer *> importers = parentObject->findChildren<Importer *>();
Chris@0 687 std::cerr << "have " << importers.size() << " importers" << std::endl;
Chris@0 688
Chris@0 689 ComposerMap composers;
Chris@0 690
Chris@0 691 QList<Composer *> dated;
Chris@0 692 QList<Composer *> undated;
Chris@0 693
Chris@0 694 QList<Work *> works;
Chris@0 695 QList<Composition *> compositions;
Chris@0 696 QList<QObject *> other;
Chris@0 697
Chris@0 698 foreach (Importer *importer, importers) {
Chris@0 699 QObjectList objects = importer->getImportedObjects();
Chris@0 700 foreach (QObject *o, objects) {
Chris@0 701 Composer *c;
Chris@0 702 if ((c = qobject_cast<Composer *>(o))) {
Chris@0 703 addMiscExpansions(c);
Chris@0 704 asciify(c);
Chris@0 705 if (c->birth() || c->death()) dated.push_back(c);
Chris@0 706 else undated.push_back(c);
Chris@0 707 continue;
Chris@0 708 }
Chris@0 709 Work *w;
Chris@0 710 if ((w = qobject_cast<Work *>(o))) {
Chris@0 711 asciify(w);
Chris@0 712 works.push_back(w);
Chris@0 713 continue;
Chris@0 714 }
Chris@0 715 Composition *cn;
Chris@0 716 if ((cn = qobject_cast<Composition *>(o))) {
Chris@0 717 compositions.push_back(cn);
Chris@0 718 continue;
Chris@0 719 }
Chris@0 720 }
Chris@0 721 }
Chris@0 722
Chris@0 723 // get all the dated composers merged before attempting to match
Chris@0 724 // the undated ones
Chris@0 725 foreach (Composer *c, dated) {
Chris@0 726 mergeComposer(c, composers);
Chris@0 727 }
Chris@0 728 foreach (Composer *c, undated) {
Chris@0 729 mergeComposer(c, composers);
Chris@0 730 }
Chris@0 731
Chris@0 732 QObjectList toStore;
Chris@0 733
Chris@0 734 QSet<Composer *> cset;
Chris@0 735 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
Chris@0 736 foreach (Composer *c, i.value()) {
Chris@0 737 if (!cset.contains(c)) {
Chris@0 738 assignUri(outstore, c);
Chris@0 739 toStore.push_back(c);
Chris@0 740 cset.insert(c);
Chris@0 741 }
Chris@0 742 foreach (Document *d, c->pages()) {
Chris@0 743 QString s = d->uri().toString();
Chris@0 744 addDbpediaResource(outstore, c, s);
Chris@0 745 }
Chris@0 746 }
Chris@0 747 }
Chris@0 748
Chris@0 749 QSet<QString> storedUris;
Chris@0 750
Chris@0 751 foreach (Work *w, works) {
Chris@0 752 Composition *cn = w->composition();
Chris@0 753 if (!cn) continue;
Chris@0 754 if (!cn->composer()) {
Chris@0 755 QString cname = cn->composerName();
Chris@0 756 if (cname != "") {
Chris@0 757 if (!composers.contains(cname.toLower())) {
Chris@0 758 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 759 } else {
Chris@0 760 QSet<Composer *> cs = composers[cname.toLower()];
Chris@0 761 if (cs.empty()) {
Chris@0 762 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 763 } else if (cs.size() > 1) {
Chris@0 764 DEBUG << "Failed to assign Composition to composer: "
Chris@0 765 << cs.size() << " composers match name " << cname << endl;
Chris@0 766 } else {
Chris@0 767 cn->setComposer(*cs.begin());
Chris@0 768 }
Chris@0 769 }
Chris@0 770 } else {
Chris@0 771 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
Chris@0 772 }
Chris@0 773 }
Chris@0 774
Chris@0 775 if (cn->composer()) {
Chris@0 776 assignUri(outstore, w, cn->composer());
Chris@0 777 }
Chris@0 778
Chris@0 779 foreach (Document *d, w->pages()) {
Chris@0 780 QString s = d->uri().toString();
Chris@0 781 addDbpediaResource(outstore, w, s);
Chris@1 782 if (!storedUris.contains(s)) {
Chris@1 783 toStore.push_back(d);
Chris@1 784 storedUris.insert(s);
Chris@1 785 }
Chris@0 786 }
Chris@0 787
Chris@0 788 QString u = w->property("uri").toUrl().toString();
Chris@0 789 if (u == "" || !storedUris.contains(u)) {
Chris@0 790 toStore.push_back(w);
Chris@0 791 if (u != "") storedUris.insert(u);
Chris@0 792 }
Chris@0 793 }
Chris@0 794
Chris@0 795 try {
Chris@0 796 outmapper.storeAllObjects(toStore);
Chris@0 797
Chris@0 798 } catch (RDFException e) {
Chris@0 799 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
Chris@0 800 }
Chris@0 801
Chris@0 802 DEBUG << "Stored, now saving" << endl;
Chris@0 803
Chris@2 804 outstore->save("imported.ttl");
Chris@0 805
Chris@0 806 DEBUG << "Saved" << endl;
Chris@0 807
Chris@0 808
Chris@0 809 QMultiMap<QString, Composer *> cmap;
Chris@0 810 foreach (Composer *c, cset) {
Chris@0 811 QString n = c->getSortName(true);
Chris@0 812 cmap.insert(n, c);
Chris@0 813 }
Chris@0 814
Chris@0 815 std::cout << "Composers: " << cmap.size() << std::endl;
Chris@0 816
Chris@0 817 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 818 i != cmap.end(); ++i) {
Chris@0 819
Chris@0 820 QString n = i.key();
Chris@0 821 Composer *c = i.value();
Chris@0 822
Chris@0 823 std::cout << n.toStdString();
Chris@0 824
Chris@0 825 QString d = c->getDisplayDates();
Chris@0 826 if (d != "") std::cout << " (" << d.toStdString() << ")";
Chris@0 827 std::cout << std::endl;
Chris@0 828 }
Chris@0 829
Chris@0 830 std::cout << std::endl;
Chris@0 831
Chris@0 832 std::cout << "Works by composer:" << std::endl;
Chris@0 833
Chris@0 834 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 835 i != cmap.end(); ++i) {
Chris@0 836
Chris@0 837 QString n = i.key();
Chris@0 838 Composer *c = i.value();
Chris@0 839
Chris@0 840 std::set<Work *, Work::Ordering> wmap;
Chris@0 841 foreach (Work *w, works) {
Chris@0 842 Composition *cn = w->composition();
Chris@0 843 if (!cn) continue;
Chris@0 844 if (cn->composer() != c) continue;
Chris@0 845 if (w->partOf()) continue;
Chris@0 846 wmap.insert(w);
Chris@0 847 }
Chris@0 848
Chris@0 849 if (wmap.empty()) continue;
Chris@0 850
Chris@0 851 std::cout << n.toStdString() << std::endl;
Chris@0 852
Chris@0 853 foreach (Work *w, wmap) {
Chris@0 854 std::cout << " * ";
Chris@0 855 std::cout << w->name().toStdString();
Chris@0 856 if (w->catalogue() != "") {
Chris@0 857 std::cout << " [" << w->catalogue().toStdString() << "]";
Chris@0 858 }
Chris@0 859 if (w->opus() != "") {
Chris@0 860 std::cout << " [op. " << w->opus().toStdString() << "]";
Chris@0 861 }
Chris@0 862 std::cout << std::endl;
Chris@0 863 std::set<Work *, Work::Ordering> orderedParts;
Chris@0 864 foreach (Work *ww, w->parts()) {
Chris@0 865 orderedParts.insert(ww);
Chris@0 866 }
Chris@0 867 foreach (Work *ww, orderedParts) {
Chris@0 868 std::cout << " ";
Chris@0 869 if (ww->number() != "") {
Chris@0 870 std::cout << ww->number().toStdString() << ". ";
Chris@0 871 }
Chris@0 872 std::cout << ww->name().toStdString();
Chris@0 873 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
Chris@0 874 std::cout << " [" << ww->catalogue().toStdString() << "]";
Chris@0 875 }
Chris@0 876 if (ww->opus() != "" && ww->opus() != w->opus()) {
Chris@0 877 std::cout << " [op. " << ww->opus().toStdString() << "]";
Chris@0 878 }
Chris@0 879 std::cout << std::endl;
Chris@0 880 }
Chris@0 881 }
Chris@0 882
Chris@0 883 std::cout << std::endl;
Chris@0 884 }
Chris@0 885
Chris@0 886 delete outstore;
Chris@0 887
Chris@0 888 DEBUG << "Done" << endl;
Chris@0 889
Chris@0 890
Chris@0 891 }
Chris@0 892
Chris@0 893