annotate import/Import.cpp @ 7:df999875c53b classical-rdf

* Test application for load/query (beginnings of)
author Chris Cannam
date Tue, 09 Feb 2010 17:33:39 +0000
parents 96bf272e74c5
children d35e5d769c87
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "Objects.h"
Chris@0 4
Chris@0 5 #include <dataquay/BasicStore.h>
Chris@0 6 #include <dataquay/RDFException.h>
Chris@0 7 #include <dataquay/objectmapper/ObjectMapper.h>
Chris@0 8 #include <dataquay/objectmapper/ObjectBuilder.h>
Chris@0 9 #include <dataquay/objectmapper/ContainerBuilder.h>
Chris@0 10
Chris@0 11 #include "ImportClassicalComposersOrg.h"
Chris@0 12 #include "ImportClassicalDotNet.h"
Chris@4 13 #include "ImportClassicalArchives.h"
Chris@0 14 #include "ImportWikipediaComposers.h"
Chris@0 15 #include "ImportWikipediaWorks.h"
Chris@0 16 #include "ImportWikipediaWorksK.h"
Chris@0 17 #include "ImportWikipediaWorksList.h"
Chris@0 18 #include "ImportHoboken.h"
Chris@0 19
Chris@7 20 #include "TypeRegistrar.h"
Chris@7 21
Chris@0 22 #include <dataquay/Debug.h>
Chris@0 23
Chris@0 24 using namespace ClassicalData;
Chris@0 25 using namespace Dataquay;
Chris@0 26
Chris@0 27 #include <iostream>
Chris@0 28 #include <set>
Chris@0 29
Chris@0 30 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
Chris@0 31
Chris@0 32 void
Chris@0 33 addMiscExpansions(Composer *c)
Chris@0 34 {
Chris@0 35 QString n = c->name();
Chris@0 36
Chris@0 37 DEBUG << "addMiscExpansions: n = " << n << endl;
Chris@0 38
Chris@0 39 // lovely hard-coded special cases go here! some of these are
Chris@0 40 // needed for works->composer assignments
Chris@0 41 if (n == "Balakirev, Milii") {
Chris@0 42 c->addAlias("Mily Balakirev");
Chris@0 43 }
Chris@0 44 if (n.startsWith("Cui, C")) {
Chris@0 45 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
Chris@0 46 }
Chris@0 47 if (n == "Handel, George Frideric") {
Chris@0 48 c->addAlias("Handel, Georg Friedrich");
Chris@0 49 c->addAlias("Handel");
Chris@0 50 }
Chris@1 51 if (n == "Prokofiev, Sergey") {
Chris@1 52 c->addAlias("Prokofieff, Sergei");
Chris@1 53 c->addAlias("Sergei Prokofieff");
Chris@1 54 }
Chris@1 55 if (n == "Rossini, Gioacchino") {
Chris@1 56 c->addAlias("Rossini, Gioachino");
Chris@1 57 c->addAlias("Gioachino Rossini");
Chris@1 58 }
Chris@1 59 if (n == "Edwards, Richard") {
Chris@1 60 c->addAlias("Edwardes, Richard");
Chris@1 61 c->addAlias("Richard Edwardes");
Chris@1 62 c->addAlias("Richard Edwards");
Chris@1 63 }
Chris@1 64 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") {
Chris@1 65 c->addAlias("Nikolai Rimsky-Korsakov");
Chris@1 66 }
Chris@1 67 if (n.startsWith("Piccinni, Nico")) {
Chris@1 68 c->addAlias(n);
Chris@1 69 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262"));
Chris@1 70 }
Chris@1 71 if (n == "Tchaikovsky, Pyotr Ilyich") {
Chris@1 72 c->addAlias("Tchaikovsky, Piotr Ilyitch");
Chris@1 73 }
Chris@1 74 if (n == "Wilhelm Stenhammar") {
Chris@1 75 c->addAlias("Stenhammar, Vilhelm Eugene");
Chris@1 76 c->setName("Stenhammar, Wilhelm");
Chris@1 77 c->addAlias(n);
Chris@1 78 }
Chris@1 79 if (n == "Mercadante, Saverio Rafaele") {
Chris@1 80 c->addAlias("Mercadante, Giuseppe");
Chris@1 81 }
Chris@1 82 if (n == "Johann Wenzel Anton Stamitz") {
Chris@1 83 c->addAlias(n);
Chris@1 84 c->setName("Stamitz, Johann Wenzel Anton");
Chris@1 85 c->addAlias("Stamitz, Jan Vaclav");
Chris@1 86 }
Chris@1 87 if (n == "Mario Castelnuovo-Tedesco") {
Chris@1 88 c->addAlias("Castelnuovo Tedesco, Mario");
Chris@1 89 }
Chris@0 90 if (n == "Mayr, Simon") {
Chris@0 91 c->addAlias("Mayr");
Chris@0 92 }
Chris@0 93
Chris@0 94 n.replace(", Sr.", " Sr.");
Chris@0 95 n.replace(", Jr.", " Jr.");
Chris@0 96
Chris@0 97 int comma = n.indexOf(", ");
Chris@0 98 if (comma > 0 && comma + 2 < n.length()) {
Chris@0 99
Chris@0 100 QString left = n.left(comma);
Chris@0 101 QString right = n.right(n.length() - comma - 2);
Chris@0 102
Chris@0 103 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
Chris@0 104 if (jrsr.indexIn(right) >= 0) {
Chris@0 105 left = left + jrsr.cap(1);
Chris@0 106 right = right.left(right.length()-jrsr.matchedLength());
Chris@0 107 }
Chris@0 108 n = right + " " + left;
Chris@0 109 }
Chris@0 110
Chris@0 111 if (n != c->name()) c->addAlias(n);
Chris@0 112
Chris@0 113 if (n.contains("Sergey")) {
Chris@0 114 QString nn(n);
Chris@0 115 nn.replace("Sergey", "Sergei");
Chris@0 116 c->addAlias(nn);
Chris@1 117 } else if (n.contains("Sergei")) {
Chris@1 118 QString nn(n);
Chris@1 119 nn.replace("Sergei", "Sergey");
Chris@1 120 c->addAlias(nn);
Chris@0 121 }
Chris@0 122
Chris@0 123 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
Chris@0 124 if (sr.indexIn(n) >= 0) {
Chris@0 125 QString nr = n;
Chris@0 126 nr.replace(sr.pos(0), sr.matchedLength(), " I");
Chris@0 127 nr.replace(" ", " ");
Chris@0 128 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 129 c->addAlias(nr);
Chris@0 130 }
Chris@0 131 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
Chris@0 132 if (jr.indexIn(n) >= 0) {
Chris@0 133 QString nr = n;
Chris@0 134 nr.replace(jr.pos(0), jr.matchedLength(), " II");
Chris@0 135 nr.replace(" ", " ");
Chris@0 136 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 137 c->addAlias(nr);
Chris@0 138 }
Chris@0 139 QString nr = n;
Chris@0 140 nr.replace("(I)", "I");
Chris@0 141 nr.replace("(II)", "II");
Chris@0 142 nr.replace("(III)", "III");
Chris@0 143 c->addAlias(nr);
Chris@0 144 }
Chris@0 145
Chris@5 146 QString makeNameKey(QString name)
Chris@5 147 {
Chris@5 148 QString key = name.toLower()
Chris@5 149 .replace("'", "")
Chris@5 150 .replace("x", "ks")
Chris@5 151 .replace("y", "i")
Chris@5 152 .replace("k", "c")
Chris@5 153 .replace("ch", "c")
Chris@5 154 .replace("cc", "c")
Chris@5 155 .replace("v", "f")
Chris@5 156 .replace("ff", "f")
Chris@5 157 .replace("th", "t")
Chris@5 158 .replace("tch", "ch")
Chris@5 159 .replace("er", "r");
Chris@5 160 // DEBUG << "makeNameKey(" << name << "): " << key << endl;
Chris@5 161 return key;
Chris@5 162 }
Chris@5 163
Chris@0 164 bool namesFuzzyMatch(QString an, Composer *b)
Chris@0 165 {
Chris@0 166 // ew!
Chris@0 167
Chris@0 168 QString bn = b->name();
Chris@0 169 if (bn == an) return true;
Chris@0 170 if (b->aliases().contains(an)) return true;
Chris@0 171 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@0 172 if (an.contains(",")) {
Chris@0 173 an.replace(",", "");
Chris@0 174 } else {
Chris@0 175 aSurnameIndex = -1;
Chris@0 176 }
Chris@0 177 if (bn.contains(",")) {
Chris@0 178 bn.replace(",", "");
Chris@0 179 } else {
Chris@0 180 bSurnameIndex = -1;
Chris@0 181 }
Chris@0 182 QStringList nl = an.split(QRegExp("[ -]"));
Chris@5 183 QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
Chris@0 184 int matchCount = 0;
Chris@0 185 QString surnameMatch = "";
Chris@0 186 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@0 187 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@0 188 if (nl[aSurnameIndex][0].isUpper() &&
Chris@0 189 nl[aSurnameIndex] != "Della" &&
Chris@5 190 makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
Chris@0 191 surnameMatch = nl[aSurnameIndex];
Chris@0 192 }
Chris@5 193 int tested = 0;
Chris@0 194 foreach (QString elt, nl) {
Chris@0 195 if (!elt[0].isUpper() || elt == "Della") continue;
Chris@5 196 QString k = makeNameKey(elt);
Chris@5 197 if (bnl.contains(k)) {
Chris@0 198 ++matchCount;
Chris@5 199 }
Chris@5 200 if (++tested == 2 && matchCount == 0) {
Chris@5 201 return false;
Chris@0 202 }
Chris@0 203 }
Chris@5 204 if (surnameMatch != "") {
Chris@0 205 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@5 206 if (matchCount > 1) {
Chris@5 207 return true;
Chris@5 208 } else {
Chris@5 209 DEBUG << "(but not enough else matched)" << endl;
Chris@5 210 return false;
Chris@5 211 }
Chris@0 212 }
Chris@0 213 return false;
Chris@0 214 }
Chris@0 215
Chris@0 216 bool
Chris@0 217 hasBetterName(Composer *c, Composer *other)
Chris@0 218 {
Chris@0 219 if (c->name() == other->name()) return false;
Chris@0 220
Chris@0 221 // Try to guess which of c and other is more likely to have a good
Chris@0 222 // "canonical form" of the composer's name
Chris@0 223
Chris@0 224 if (c->name().startsWith("van ")) {
Chris@0 225 return false; // wrong choice of sort for e.g. LvB; should be
Chris@0 226 // Beethoven, Ludwig van, not van Beethoven, Ludwig
Chris@0 227 }
Chris@0 228 if (other->name().startsWith("van ")) {
Chris@0 229 return true;
Chris@0 230 }
Chris@0 231
Chris@0 232 if (c->aliases().size() != other->aliases().size()) {
Chris@0 233 // a rather weak heuristic
Chris@0 234 return c->aliases().size() > other->aliases().size();
Chris@0 235 }
Chris@0 236
Chris@0 237 if (c->name().contains(',') && !other->name().contains(',')) {
Chris@0 238 // another rather weak heuristic
Chris@0 239 return true;
Chris@0 240 }
Chris@0 241
Chris@0 242 return false;
Chris@0 243 }
Chris@0 244
Chris@0 245 void mergeComposer(Composer *c, ComposerMap &composers)
Chris@0 246 {
Chris@0 247 QString name = c->name();
Chris@0 248
Chris@0 249 QSet<QString> allNames = c->aliases();
Chris@0 250 allNames.insert(name);
Chris@0 251
Chris@0 252 QString dates;
Chris@0 253 if (c->birth()) {
Chris@0 254 if (c->death()) {
Chris@0 255 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
Chris@0 256 } else {
Chris@0 257 dates = QString("%1-").arg(c->birth()->year());
Chris@0 258 }
Chris@0 259 }
Chris@0 260 if (dates != "") {
Chris@0 261 allNames.insert(dates);
Chris@0 262 }
Chris@0 263
Chris@0 264 QSet<Composer *> matches;
Chris@0 265
Chris@0 266 foreach (QString candidateName, allNames) {
Chris@5 267 QString key = makeNameKey(candidateName);
Chris@0 268 if (composers.contains(key)) {
Chris@0 269 foreach (Composer *candidate, composers[key]) {
Chris@0 270 if (candidateName == dates) {
Chris@5 271 if (c->name() == candidate->name()) {
Chris@5 272 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl;
Chris@5 273 } else if (!namesFuzzyMatch(c->name(), candidate) &&
Chris@5 274 !namesFuzzyMatch(candidate->name(), c)) {
Chris@0 275 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
Chris@0 276 continue;
Chris@0 277 } else {
Chris@0 278 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
Chris@0 279 }
Chris@0 280 } else {
Chris@1 281 if (!c->datesMatch(candidate)) {
Chris@0 282 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
Chris@0 283 continue;
Chris@0 284 }
Chris@0 285 }
Chris@0 286 matches.insert(candidate);
Chris@0 287 }
Chris@0 288 }
Chris@0 289 }
Chris@0 290
Chris@0 291 if (matches.empty()) {
Chris@0 292 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
Chris@0 293
Chris@0 294 if (!c->birth() && !c->death()) {
Chris@5 295 DEBUG << "Composer has no dates, laboriously searching for all names" << endl;
Chris@0 296 // laboriously look for fuzzy match across _all_ composers
Chris@0 297 for (ComposerMap::iterator i = composers.begin();
Chris@0 298 i != composers.end(); ++i) {
Chris@0 299 foreach (Composer *candidate, *i) {
Chris@0 300 if (namesFuzzyMatch(c->name(), candidate)) {
Chris@0 301 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
Chris@0 302 matches.insert(candidate);
Chris@0 303 break;
Chris@0 304 }
Chris@0 305 }
Chris@0 306 if (!matches.empty()) break;
Chris@0 307 }
Chris@0 308 }
Chris@0 309
Chris@0 310 if (matches.empty()) {
Chris@0 311 foreach (QString candidateName, allNames) {
Chris@5 312 QString key = makeNameKey(candidateName);
Chris@5 313 composers[key].insert(c);
Chris@0 314 DEBUG << "added for alias or date " << candidateName << endl;
Chris@0 315 }
Chris@0 316 return;
Chris@0 317 }
Chris@0 318 }
Chris@0 319
Chris@0 320 if (matches.size() > 1) {
Chris@0 321 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
Chris@0 322 }
Chris@0 323
Chris@0 324 Composer *other = *matches.begin();
Chris@0 325
Chris@0 326 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
Chris@0 327
Chris@0 328 if (hasBetterName(c, other)) {
Chris@0 329 other->addAlias(other->name());
Chris@0 330 other->setName(c->name());
Chris@0 331 } else {
Chris@0 332 other->addAlias(c->name());
Chris@0 333 }
Chris@5 334 composers[makeNameKey(c->name())].insert(other);
Chris@0 335 DEBUG << "linking from alias " << c->name() << endl;
Chris@0 336
Chris@0 337 foreach (QString alias, c->aliases()) {
Chris@0 338 if (alias != other->name() &&
Chris@0 339 !other->aliases().contains(alias)) {
Chris@0 340 other->addAlias(alias);
Chris@5 341 composers[makeNameKey(alias)].insert(other);
Chris@0 342 DEBUG << "linking from alias " << alias << endl;
Chris@0 343 }
Chris@0 344 }
Chris@0 345
Chris@0 346 foreach (Document *d, c->pages()) {
Chris@0 347 bool found = false;
Chris@0 348 foreach (Document *dd, other->pages()) {
Chris@0 349 if (d->uri() == dd->uri()) {
Chris@0 350 found = true;
Chris@0 351 break;
Chris@0 352 }
Chris@0 353 }
Chris@0 354 if (!found) {
Chris@0 355 d->setTopic(other);
Chris@0 356 other->addPage(d);
Chris@0 357 }
Chris@0 358 }
Chris@0 359
Chris@0 360 //!!! actually the "approximate" bits of the following are bogus;
Chris@0 361 // a source reporting birth or death date as approx is probably
Chris@0 362 // more accurate than one reporting an exact date
Chris@0 363
Chris@0 364 if (c->birth()) {
Chris@0 365 if (!other->birth() || other->birth()->approximate()) {
Chris@0 366 other->setBirth(c->birth());
Chris@0 367 }
Chris@0 368 }
Chris@0 369
Chris@0 370 if (c->death()) {
Chris@0 371 if (!other->death() || other->death()->approximate()) {
Chris@0 372 other->setDeath(c->death());
Chris@0 373 }
Chris@0 374 }
Chris@0 375
Chris@0 376 if (c->gender() != "") other->setGender(c->gender());
Chris@4 377
Chris@4 378 foreach (QString s, c->nationality()) {
Chris@4 379 other->addNationality(s);
Chris@4 380 }
Chris@4 381
Chris@4 382 foreach (QUrl s, c->geonameURIs()) {
Chris@4 383 other->addGeonameURI(s);
Chris@4 384 }
Chris@4 385
Chris@0 386 if (c->remarks() != "") other->setRemarks(c->remarks());
Chris@0 387 if (c->period() != "") other->setPeriod(c->period());
Chris@0 388
Chris@0 389 }
Chris@0 390
Chris@0 391 QString
Chris@0 392 asciify(QString field)
Chris@0 393 {
Chris@0 394 // accented characters etc -- add "ascii version" for dumb search purposes
Chris@0 395 QString ascii;
Chris@0 396 for (int i = 0; i < field.length(); ++i) {
Chris@0 397 QString dc = field[i].decomposition();
Chris@0 398 if (dc != "") ascii += dc[0];
Chris@0 399 else if (field[i] == QChar(0x00DF)) {
Chris@0 400 ascii += "ss";
Chris@0 401 } else {
Chris@0 402 ascii += field[i];
Chris@0 403 }
Chris@0 404 }
Chris@0 405 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
Chris@0 406 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
Chris@0 407 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
Chris@0 408 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
Chris@0 409 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
Chris@0 410 return ascii;
Chris@0 411 }
Chris@0 412
Chris@0 413 void
Chris@0 414 asciify(Composer *c)
Chris@0 415 {
Chris@0 416 QString n = c->name();
Chris@0 417 QString asc = asciify(n);
Chris@0 418 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 419 foreach (QString alias, c->aliases()) {
Chris@0 420 asc = asciify(alias);
Chris@0 421 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 422 }
Chris@0 423 }
Chris@0 424
Chris@0 425 void
Chris@0 426 asciify(Work *w)
Chris@0 427 {
Chris@0 428 QString n = w->name();
Chris@0 429 QString asc = asciify(n);
Chris@0 430 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 431 foreach (QString alias, w->aliases()) {
Chris@0 432 asc = asciify(alias);
Chris@0 433 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 434 }
Chris@0 435 }
Chris@0 436
Chris@0 437 void
Chris@0 438 assignUri(Store *s, Composer *c)
Chris@0 439 {
Chris@0 440 static QSet<QString> convSet;
Chris@0 441 QString conv = c->name();
Chris@0 442 if (!conv.contains(",")) {
Chris@0 443 QStringList sl = conv.split(" ");
Chris@0 444 if (!sl.empty()) {
Chris@0 445 sl.push_front(sl[sl.size()-1]);
Chris@0 446 sl.removeLast();
Chris@0 447 conv = sl.join(" ");
Chris@0 448 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
Chris@0 449 }
Chris@0 450 }
Chris@0 451 conv = asciify(conv);
Chris@0 452 conv.replace(" ", "_");
Chris@0 453 conv.replace("-", "_");
Chris@0 454 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@0 455 conv = conv.toLower();
Chris@0 456 QString initial = conv;
Chris@1 457 int i = 2;
Chris@0 458 while (convSet.contains(conv)) {
Chris@0 459 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 460 i++;
Chris@0 461 }
Chris@0 462 convSet.insert(conv);
Chris@1 463 c->setProperty("uri", s->expand(":composer/" + conv));
Chris@0 464 }
Chris@0 465
Chris@0 466 void
Chris@0 467 assignUri(Store *s, Work *w, Composer *c)
Chris@0 468 {
Chris@0 469 QString pfx = c->property("uri").toUrl().toString();
Chris@0 470 DEBUG << "pfx = " << pfx << endl;
Chris@2 471 if (!pfx.contains("composer/")) pfx = ":work/";
Chris@2 472 else {
Chris@2 473 pfx.replace("composer/", "work/");
Chris@2 474 pfx += "/";
Chris@2 475 }
Chris@0 476
Chris@0 477 static QSet<QString> convSet;
Chris@1 478
Chris@0 479 QString conv = w->catalogue();
Chris@0 480 if (conv == "") conv = w->opus();
Chris@0 481 conv = conv.replace(".", "");
Chris@0 482 bool hasOpus = (conv != "");
Chris@1 483 if (conv == "") conv = w->name().toLower();
Chris@0 484 if (w->number() != "") conv = conv + "_no" + w->number();
Chris@0 485 conv = asciify(conv);
Chris@0 486 conv.replace(" ", "_");
Chris@0 487 conv.replace("-", "_");
Chris@0 488 conv.replace(":", "_");
Chris@0 489 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@1 490
Chris@2 491 if (pfx != "") conv = pfx + conv;
Chris@1 492
Chris@0 493 // I think actually for works we want to merge duplicates rather than
Chris@0 494 // assign them separate URIs, _unless_ they lack a viable opus number
Chris@0 495 if (!hasOpus) {
Chris@0 496 QString initial = conv;
Chris@1 497 int i = 2;
Chris@0 498 while (convSet.contains(conv)) {
Chris@0 499 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 500 i++;
Chris@0 501 }
Chris@0 502 }
Chris@0 503 convSet.insert(conv);
Chris@1 504
Chris@1 505 w->setProperty("uri", conv);
Chris@0 506 }
Chris@0 507
Chris@0 508 void
Chris@0 509 addDbpediaResource(Store *store, QObject *o, QString s)
Chris@0 510 {
Chris@0 511 QUrl u = o->property("uri").toUrl();
Chris@0 512 if (u == QUrl()) return;
Chris@0 513 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
Chris@0 514 store->add(Triple(u,
Chris@0 515 "mo:wikipedia",
Chris@0 516 QUrl(s)));
Chris@0 517 s.replace("http://en.wikipedia.org/wiki/",
Chris@0 518 "http://dbpedia.org/resource/");
Chris@0 519 store->add(Triple(u,
Chris@0 520 "owl:sameAs",
Chris@0 521 QUrl(s)));
Chris@0 522 }
Chris@0 523 }
Chris@0 524
Chris@0 525 int main(int argc, char **argv)
Chris@0 526 {
Chris@0 527 qRegisterMetaType<ClassicalComposersOrgImporter *>
Chris@0 528 ("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 529 qRegisterMetaType<ClassicalDotNetImporter *>
Chris@0 530 ("ClassicalData::ClassicalDotNetImporter*");
Chris@4 531 qRegisterMetaType<ClassicalArchivesImporter *>
Chris@4 532 ("ClassicalData::ClassicalArchivesImporter*");
Chris@0 533 qRegisterMetaType<WikipediaComposersImporter *>
Chris@0 534 ("ClassicalData::WikipediaComposersImporter*");
Chris@0 535 qRegisterMetaType<WikipediaWorksImporter *>
Chris@0 536 ("ClassicalData::WikipediaWorksImporter*");
Chris@0 537 qRegisterMetaType<WikipediaWorksKImporter *>
Chris@0 538 ("ClassicalData::WikipediaWorksKImporter*");
Chris@0 539 qRegisterMetaType<WikipediaWorksListImporter *>
Chris@0 540 ("ClassicalData::WikipediaWorksListImporter*");
Chris@0 541 qRegisterMetaType<HobokenImporter *>
Chris@0 542 ("ClassicalData::HobokenImporter*");
Chris@0 543
Chris@0 544 ObjectBuilder::getInstance()->registerClass
Chris@0 545 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 546 ObjectBuilder::getInstance()->registerClass
Chris@0 547 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
Chris@0 548 ObjectBuilder::getInstance()->registerClass
Chris@4 549 <ClassicalArchivesImporter>("ClassicalData::ClassicalArchivesImporter*");
Chris@4 550 ObjectBuilder::getInstance()->registerClass
Chris@0 551 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
Chris@0 552 ObjectBuilder::getInstance()->registerClass
Chris@0 553 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
Chris@0 554 ObjectBuilder::getInstance()->registerClass
Chris@0 555 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
Chris@0 556 ObjectBuilder::getInstance()->registerClass
Chris@0 557 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
Chris@0 558 ObjectBuilder::getInstance()->registerClass
Chris@0 559 <HobokenImporter>("ClassicalData::HobokenImporter*");
Chris@0 560
Chris@0 561 BasicStore *store = BasicStore::load("file:importers.ttl");
Chris@0 562 ObjectMapper mapper(store);
Chris@0 563 QObject *parentObject = mapper.loadAllObjects(new QObject());
Chris@0 564
Chris@0 565 BasicStore *outstore = new BasicStore();
Chris@1 566 outstore->setBaseUri("http://dbtune.org/classical/resource/");
Chris@0 567 ObjectMapper outmapper(outstore);
Chris@0 568
Chris@7 569 TypeRegistrar::addMappings(outstore, &outmapper);
Chris@7 570
Chris@0 571 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
Chris@3 572 outmapper.setObjectStorePolicy(ObjectMapper::StoreAllObjects);
Chris@3 573 outmapper.setBlankNodePolicy(ObjectMapper::NoBlankNodes);
Chris@1 574
Chris@0 575 QList<Importer *> importers = parentObject->findChildren<Importer *>();
Chris@0 576 std::cerr << "have " << importers.size() << " importers" << std::endl;
Chris@0 577
Chris@0 578 ComposerMap composers;
Chris@0 579
Chris@0 580 QList<Composer *> dated;
Chris@0 581 QList<Composer *> undated;
Chris@0 582
Chris@0 583 QList<Work *> works;
Chris@0 584 QList<Composition *> compositions;
Chris@0 585 QList<QObject *> other;
Chris@0 586
Chris@0 587 foreach (Importer *importer, importers) {
Chris@0 588 QObjectList objects = importer->getImportedObjects();
Chris@0 589 foreach (QObject *o, objects) {
Chris@0 590 Composer *c;
Chris@0 591 if ((c = qobject_cast<Composer *>(o))) {
Chris@0 592 addMiscExpansions(c);
Chris@0 593 asciify(c);
Chris@0 594 if (c->birth() || c->death()) dated.push_back(c);
Chris@0 595 else undated.push_back(c);
Chris@0 596 continue;
Chris@0 597 }
Chris@0 598 Work *w;
Chris@0 599 if ((w = qobject_cast<Work *>(o))) {
Chris@0 600 asciify(w);
Chris@0 601 works.push_back(w);
Chris@0 602 continue;
Chris@0 603 }
Chris@0 604 Composition *cn;
Chris@0 605 if ((cn = qobject_cast<Composition *>(o))) {
Chris@0 606 compositions.push_back(cn);
Chris@0 607 continue;
Chris@0 608 }
Chris@0 609 }
Chris@0 610 }
Chris@0 611
Chris@0 612 // get all the dated composers merged before attempting to match
Chris@0 613 // the undated ones
Chris@0 614 foreach (Composer *c, dated) {
Chris@0 615 mergeComposer(c, composers);
Chris@0 616 }
Chris@0 617 foreach (Composer *c, undated) {
Chris@0 618 mergeComposer(c, composers);
Chris@0 619 }
Chris@0 620
Chris@0 621 QObjectList toStore;
Chris@0 622
Chris@0 623 QSet<Composer *> cset;
Chris@0 624 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
Chris@0 625 foreach (Composer *c, i.value()) {
Chris@0 626 if (!cset.contains(c)) {
Chris@0 627 assignUri(outstore, c);
Chris@0 628 toStore.push_back(c);
Chris@0 629 cset.insert(c);
Chris@0 630 }
Chris@0 631 foreach (Document *d, c->pages()) {
Chris@0 632 QString s = d->uri().toString();
Chris@0 633 addDbpediaResource(outstore, c, s);
Chris@0 634 }
Chris@0 635 }
Chris@0 636 }
Chris@0 637
Chris@0 638 QSet<QString> storedUris;
Chris@0 639
Chris@0 640 foreach (Work *w, works) {
Chris@0 641 Composition *cn = w->composition();
Chris@0 642 if (!cn) continue;
Chris@0 643 if (!cn->composer()) {
Chris@0 644 QString cname = cn->composerName();
Chris@5 645 QString key = makeNameKey(cname);
Chris@0 646 if (cname != "") {
Chris@5 647 if (!composers.contains(key)) {
Chris@0 648 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 649 } else {
Chris@5 650 QSet<Composer *> cs = composers[key];
Chris@0 651 if (cs.empty()) {
Chris@0 652 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 653 } else if (cs.size() > 1) {
Chris@0 654 DEBUG << "Failed to assign Composition to composer: "
Chris@0 655 << cs.size() << " composers match name " << cname << endl;
Chris@0 656 } else {
Chris@0 657 cn->setComposer(*cs.begin());
Chris@0 658 }
Chris@0 659 }
Chris@0 660 } else {
Chris@0 661 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
Chris@0 662 }
Chris@0 663 }
Chris@0 664
Chris@0 665 if (cn->composer()) {
Chris@0 666 assignUri(outstore, w, cn->composer());
Chris@0 667 }
Chris@0 668
Chris@0 669 foreach (Document *d, w->pages()) {
Chris@0 670 QString s = d->uri().toString();
Chris@0 671 addDbpediaResource(outstore, w, s);
Chris@1 672 if (!storedUris.contains(s)) {
Chris@1 673 toStore.push_back(d);
Chris@1 674 storedUris.insert(s);
Chris@1 675 }
Chris@0 676 }
Chris@0 677
Chris@0 678 QString u = w->property("uri").toUrl().toString();
Chris@0 679 if (u == "" || !storedUris.contains(u)) {
Chris@0 680 toStore.push_back(w);
Chris@0 681 if (u != "") storedUris.insert(u);
Chris@0 682 }
Chris@0 683 }
Chris@0 684
Chris@0 685 try {
Chris@0 686 outmapper.storeAllObjects(toStore);
Chris@0 687
Chris@0 688 } catch (RDFException e) {
Chris@0 689 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
Chris@0 690 }
Chris@0 691
Chris@0 692 DEBUG << "Stored, now saving" << endl;
Chris@0 693
Chris@2 694 outstore->save("imported.ttl");
Chris@0 695
Chris@0 696 DEBUG << "Saved" << endl;
Chris@0 697
Chris@0 698
Chris@0 699 QMultiMap<QString, Composer *> cmap;
Chris@0 700 foreach (Composer *c, cset) {
Chris@0 701 QString n = c->getSortName(true);
Chris@0 702 cmap.insert(n, c);
Chris@0 703 }
Chris@0 704
Chris@0 705 std::cout << "Composers: " << cmap.size() << std::endl;
Chris@0 706
Chris@0 707 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 708 i != cmap.end(); ++i) {
Chris@0 709
Chris@0 710 QString n = i.key();
Chris@0 711 Composer *c = i.value();
Chris@0 712
Chris@0 713 std::cout << n.toStdString();
Chris@0 714
Chris@0 715 QString d = c->getDisplayDates();
Chris@0 716 if (d != "") std::cout << " (" << d.toStdString() << ")";
Chris@0 717 std::cout << std::endl;
Chris@0 718 }
Chris@0 719
Chris@0 720 std::cout << std::endl;
Chris@0 721
Chris@0 722 std::cout << "Works by composer:" << std::endl;
Chris@0 723
Chris@0 724 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 725 i != cmap.end(); ++i) {
Chris@0 726
Chris@0 727 QString n = i.key();
Chris@0 728 Composer *c = i.value();
Chris@0 729
Chris@0 730 std::set<Work *, Work::Ordering> wmap;
Chris@0 731 foreach (Work *w, works) {
Chris@0 732 Composition *cn = w->composition();
Chris@0 733 if (!cn) continue;
Chris@0 734 if (cn->composer() != c) continue;
Chris@0 735 if (w->partOf()) continue;
Chris@0 736 wmap.insert(w);
Chris@0 737 }
Chris@0 738
Chris@0 739 if (wmap.empty()) continue;
Chris@0 740
Chris@0 741 std::cout << n.toStdString() << std::endl;
Chris@0 742
Chris@0 743 foreach (Work *w, wmap) {
Chris@0 744 std::cout << " * ";
Chris@0 745 std::cout << w->name().toStdString();
Chris@0 746 if (w->catalogue() != "") {
Chris@0 747 std::cout << " [" << w->catalogue().toStdString() << "]";
Chris@0 748 }
Chris@0 749 if (w->opus() != "") {
Chris@0 750 std::cout << " [op. " << w->opus().toStdString() << "]";
Chris@0 751 }
Chris@0 752 std::cout << std::endl;
Chris@0 753 std::set<Work *, Work::Ordering> orderedParts;
Chris@0 754 foreach (Work *ww, w->parts()) {
Chris@0 755 orderedParts.insert(ww);
Chris@0 756 }
Chris@0 757 foreach (Work *ww, orderedParts) {
Chris@0 758 std::cout << " ";
Chris@0 759 if (ww->number() != "") {
Chris@0 760 std::cout << ww->number().toStdString() << ". ";
Chris@0 761 }
Chris@0 762 std::cout << ww->name().toStdString();
Chris@0 763 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
Chris@0 764 std::cout << " [" << ww->catalogue().toStdString() << "]";
Chris@0 765 }
Chris@0 766 if (ww->opus() != "" && ww->opus() != w->opus()) {
Chris@0 767 std::cout << " [op. " << ww->opus().toStdString() << "]";
Chris@0 768 }
Chris@0 769 std::cout << std::endl;
Chris@0 770 }
Chris@0 771 }
Chris@0 772
Chris@0 773 std::cout << std::endl;
Chris@0 774 }
Chris@0 775
Chris@0 776 delete outstore;
Chris@0 777
Chris@0 778 DEBUG << "Done" << endl;
Chris@0 779
Chris@0 780
Chris@0 781 }
Chris@0 782
Chris@0 783