annotate import/Import.cpp @ 1:29ca5974905d classical-rdf

* More work on a nice tidy import; get some sensible URIs etc
author Chris Cannam
date Thu, 03 Dec 2009 15:42:10 +0000
parents import/Test.cpp@e8f4c2b55fd8
children ff067a1e7e3d
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "Objects.h"
Chris@0 4
Chris@0 5 #include <dataquay/BasicStore.h>
Chris@0 6 #include <dataquay/RDFException.h>
Chris@0 7 #include <dataquay/objectmapper/ObjectMapper.h>
Chris@0 8 #include <dataquay/objectmapper/ObjectBuilder.h>
Chris@0 9 #include <dataquay/objectmapper/ContainerBuilder.h>
Chris@0 10
Chris@0 11 #include "ImportClassicalComposersOrg.h"
Chris@0 12 #include "ImportClassicalDotNet.h"
Chris@0 13 #include "ImportWikipediaComposers.h"
Chris@0 14 #include "ImportWikipediaWorks.h"
Chris@0 15 #include "ImportWikipediaWorksK.h"
Chris@0 16 #include "ImportWikipediaWorksList.h"
Chris@0 17 #include "ImportHoboken.h"
Chris@0 18
Chris@0 19 #include <dataquay/Debug.h>
Chris@0 20
Chris@0 21 using namespace ClassicalData;
Chris@0 22 using namespace Dataquay;
Chris@0 23
Chris@0 24 #include <iostream>
Chris@0 25 #include <set>
Chris@0 26
Chris@0 27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
Chris@0 28
Chris@0 29 void
Chris@0 30 addMiscExpansions(Composer *c)
Chris@0 31 {
Chris@0 32 QString n = c->name();
Chris@0 33
Chris@0 34 DEBUG << "addMiscExpansions: n = " << n << endl;
Chris@0 35
Chris@0 36 // lovely hard-coded special cases go here! some of these are
Chris@0 37 // needed for works->composer assignments
Chris@0 38 if (n == "Balakirev, Milii") {
Chris@0 39 c->addAlias("Mily Balakirev");
Chris@0 40 }
Chris@0 41 if (n.startsWith("Cui, C")) {
Chris@0 42 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
Chris@0 43 }
Chris@0 44 if (n == "Handel, George Frideric") {
Chris@0 45 c->addAlias("Handel, Georg Friedrich");
Chris@0 46 c->addAlias("Handel");
Chris@0 47 }
Chris@1 48 if (n == "Prokofiev, Sergey") {
Chris@1 49 c->addAlias("Prokofieff, Sergei");
Chris@1 50 c->addAlias("Sergei Prokofieff");
Chris@1 51 }
Chris@1 52 if (n == "Rossini, Gioacchino") {
Chris@1 53 c->addAlias("Rossini, Gioachino");
Chris@1 54 c->addAlias("Gioachino Rossini");
Chris@1 55 }
Chris@1 56 if (n == "Edwards, Richard") {
Chris@1 57 c->addAlias("Edwardes, Richard");
Chris@1 58 c->addAlias("Richard Edwardes");
Chris@1 59 c->addAlias("Richard Edwards");
Chris@1 60 }
Chris@1 61 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") {
Chris@1 62 c->addAlias("Nikolai Rimsky-Korsakov");
Chris@1 63 }
Chris@1 64 if (n.startsWith("Piccinni, Nico")) {
Chris@1 65 c->addAlias(n);
Chris@1 66 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262"));
Chris@1 67 }
Chris@1 68 if (n == "Tchaikovsky, Pyotr Ilyich") {
Chris@1 69 c->addAlias("Tchaikovsky, Piotr Ilyitch");
Chris@1 70 }
Chris@1 71 if (n == "Wilhelm Stenhammar") {
Chris@1 72 c->addAlias("Stenhammar, Vilhelm Eugene");
Chris@1 73 c->setName("Stenhammar, Wilhelm");
Chris@1 74 c->addAlias(n);
Chris@1 75 }
Chris@1 76 if (n == "Mercadante, Saverio Rafaele") {
Chris@1 77 c->addAlias("Mercadante, Giuseppe");
Chris@1 78 }
Chris@1 79 if (n == "Johann Wenzel Anton Stamitz") {
Chris@1 80 c->addAlias(n);
Chris@1 81 c->setName("Stamitz, Johann Wenzel Anton");
Chris@1 82 c->addAlias("Stamitz, Jan Vaclav");
Chris@1 83 }
Chris@1 84 if (n == "Mario Castelnuovo-Tedesco") {
Chris@1 85 c->addAlias("Castelnuovo Tedesco, Mario");
Chris@1 86 }
Chris@0 87 if (n == "Mayr, Simon") {
Chris@0 88 c->addAlias("Mayr");
Chris@0 89 }
Chris@0 90
Chris@0 91 n.replace(", Sr.", " Sr.");
Chris@0 92 n.replace(", Jr.", " Jr.");
Chris@0 93
Chris@0 94 int comma = n.indexOf(", ");
Chris@0 95 if (comma > 0 && comma + 2 < n.length()) {
Chris@0 96
Chris@0 97 QString left = n.left(comma);
Chris@0 98 QString right = n.right(n.length() - comma - 2);
Chris@0 99
Chris@0 100 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
Chris@0 101 if (jrsr.indexIn(right) >= 0) {
Chris@0 102 left = left + jrsr.cap(1);
Chris@0 103 right = right.left(right.length()-jrsr.matchedLength());
Chris@0 104 }
Chris@0 105 n = right + " " + left;
Chris@0 106 }
Chris@0 107
Chris@0 108 if (n != c->name()) c->addAlias(n);
Chris@0 109
Chris@0 110 if (n.contains("Sergey")) {
Chris@0 111 QString nn(n);
Chris@0 112 nn.replace("Sergey", "Sergei");
Chris@0 113 c->addAlias(nn);
Chris@1 114 } else if (n.contains("Sergei")) {
Chris@1 115 QString nn(n);
Chris@1 116 nn.replace("Sergei", "Sergey");
Chris@1 117 c->addAlias(nn);
Chris@0 118 }
Chris@0 119
Chris@0 120 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
Chris@0 121 if (sr.indexIn(n) >= 0) {
Chris@0 122 QString nr = n;
Chris@0 123 nr.replace(sr.pos(0), sr.matchedLength(), " I");
Chris@0 124 nr.replace(" ", " ");
Chris@0 125 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 126 c->addAlias(nr);
Chris@0 127 }
Chris@0 128 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
Chris@0 129 if (jr.indexIn(n) >= 0) {
Chris@0 130 QString nr = n;
Chris@0 131 nr.replace(jr.pos(0), jr.matchedLength(), " II");
Chris@0 132 nr.replace(" ", " ");
Chris@0 133 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 134 c->addAlias(nr);
Chris@0 135 }
Chris@0 136 QString nr = n;
Chris@0 137 nr.replace("(I)", "I");
Chris@0 138 nr.replace("(II)", "II");
Chris@0 139 nr.replace("(III)", "III");
Chris@0 140 c->addAlias(nr);
Chris@0 141 }
Chris@0 142
Chris@0 143 bool namesFuzzyMatch(QString an, Composer *b)
Chris@0 144 {
Chris@0 145 // ew!
Chris@0 146
Chris@0 147 QString bn = b->name();
Chris@0 148 if (bn == an) return true;
Chris@0 149 if (b->aliases().contains(an)) return true;
Chris@0 150 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@0 151 if (an.contains(",")) {
Chris@0 152 an.replace(",", "");
Chris@0 153 } else {
Chris@0 154 aSurnameIndex = -1;
Chris@0 155 }
Chris@0 156 if (bn.contains(",")) {
Chris@0 157 bn.replace(",", "");
Chris@0 158 } else {
Chris@0 159 bSurnameIndex = -1;
Chris@0 160 }
Chris@0 161 QStringList nl = an.split(QRegExp("[ -]"));
Chris@0 162 QStringList bnl = bn.split(QRegExp("[ -]"));
Chris@0 163 int matchCount = 0;
Chris@0 164 QString surnameMatch = "";
Chris@0 165 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@0 166 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@0 167 if (nl[aSurnameIndex][0].isUpper() &&
Chris@0 168 nl[aSurnameIndex] != "Della" &&
Chris@0 169 nl[aSurnameIndex] == bnl[bSurnameIndex]) {
Chris@0 170 surnameMatch = nl[aSurnameIndex];
Chris@0 171 }
Chris@0 172 foreach (QString elt, nl) {
Chris@0 173 if (!elt[0].isUpper() || elt == "Della") continue;
Chris@0 174 if (bnl.contains(elt)) {
Chris@0 175 ++matchCount;
Chris@0 176 continue;
Chris@0 177 }
Chris@0 178 }
Chris@0 179 if (matchCount > 1 && surnameMatch != "") {
Chris@0 180 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@0 181 return true;
Chris@0 182 }
Chris@0 183 return false;
Chris@0 184 }
Chris@0 185
Chris@0 186 bool
Chris@0 187 hasBetterName(Composer *c, Composer *other)
Chris@0 188 {
Chris@0 189 if (c->name() == other->name()) return false;
Chris@0 190
Chris@0 191 // Try to guess which of c and other is more likely to have a good
Chris@0 192 // "canonical form" of the composer's name
Chris@0 193
Chris@0 194 if (c->name().startsWith("van ")) {
Chris@0 195 return false; // wrong choice of sort for e.g. LvB; should be
Chris@0 196 // Beethoven, Ludwig van, not van Beethoven, Ludwig
Chris@0 197 }
Chris@0 198 if (other->name().startsWith("van ")) {
Chris@0 199 return true;
Chris@0 200 }
Chris@0 201
Chris@0 202 if (c->aliases().size() != other->aliases().size()) {
Chris@0 203 // a rather weak heuristic
Chris@0 204 return c->aliases().size() > other->aliases().size();
Chris@0 205 }
Chris@0 206
Chris@0 207 if (c->name().contains(',') && !other->name().contains(',')) {
Chris@0 208 // another rather weak heuristic
Chris@0 209 return true;
Chris@0 210 }
Chris@0 211
Chris@0 212 return false;
Chris@0 213 }
Chris@0 214
Chris@0 215 void mergeComposer(Composer *c, ComposerMap &composers)
Chris@0 216 {
Chris@0 217 QString name = c->name();
Chris@0 218
Chris@0 219 QSet<QString> allNames = c->aliases();
Chris@0 220 allNames.insert(name);
Chris@0 221
Chris@0 222 QString dates;
Chris@0 223 if (c->birth()) {
Chris@0 224 if (c->death()) {
Chris@0 225 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
Chris@0 226 } else {
Chris@0 227 dates = QString("%1-").arg(c->birth()->year());
Chris@0 228 }
Chris@0 229 }
Chris@0 230 if (dates != "") {
Chris@0 231 allNames.insert(dates);
Chris@0 232 }
Chris@0 233
Chris@0 234 QSet<Composer *> matches;
Chris@0 235
Chris@0 236 foreach (QString candidateName, allNames) {
Chris@0 237 QString key = candidateName.toLower();
Chris@0 238 if (composers.contains(key)) {
Chris@0 239 foreach (Composer *candidate, composers[key]) {
Chris@0 240 if (candidateName == dates) {
Chris@0 241 if (!namesFuzzyMatch(c->name(), candidate) &&
Chris@0 242 !namesFuzzyMatch(candidate->name(), c)) {
Chris@0 243 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
Chris@0 244 continue;
Chris@0 245 } else {
Chris@0 246 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
Chris@0 247 }
Chris@0 248 } else {
Chris@1 249 if (!c->datesMatch(candidate)) {
Chris@0 250 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
Chris@0 251 continue;
Chris@0 252 }
Chris@0 253 }
Chris@0 254 matches.insert(candidate);
Chris@0 255 }
Chris@0 256 }
Chris@0 257 }
Chris@0 258
Chris@0 259 if (matches.empty()) {
Chris@0 260 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
Chris@0 261
Chris@0 262 if (!c->birth() && !c->death()) {
Chris@0 263 // laboriously look for fuzzy match across _all_ composers
Chris@0 264 for (ComposerMap::iterator i = composers.begin();
Chris@0 265 i != composers.end(); ++i) {
Chris@0 266 foreach (Composer *candidate, *i) {
Chris@0 267 if (namesFuzzyMatch(c->name(), candidate)) {
Chris@0 268 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
Chris@0 269 matches.insert(candidate);
Chris@0 270 break;
Chris@0 271 }
Chris@0 272 }
Chris@0 273 if (!matches.empty()) break;
Chris@0 274 }
Chris@0 275 }
Chris@0 276
Chris@0 277 if (matches.empty()) {
Chris@0 278 foreach (QString candidateName, allNames) {
Chris@0 279 composers[candidateName.toLower()].insert(c);
Chris@0 280 DEBUG << "added for alias or date " << candidateName << endl;
Chris@0 281 }
Chris@0 282 return;
Chris@0 283 }
Chris@0 284 }
Chris@0 285
Chris@0 286 if (matches.size() > 1) {
Chris@0 287 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
Chris@0 288 }
Chris@0 289
Chris@0 290 Composer *other = *matches.begin();
Chris@0 291
Chris@0 292 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
Chris@0 293
Chris@0 294 if (hasBetterName(c, other)) {
Chris@0 295 other->addAlias(other->name());
Chris@0 296 other->setName(c->name());
Chris@0 297 } else {
Chris@0 298 other->addAlias(c->name());
Chris@0 299 }
Chris@0 300 composers[c->name().toLower()].insert(other);
Chris@0 301 DEBUG << "linking from alias " << c->name() << endl;
Chris@0 302
Chris@0 303 foreach (QString alias, c->aliases()) {
Chris@0 304 if (alias != other->name() &&
Chris@0 305 !other->aliases().contains(alias)) {
Chris@0 306 other->addAlias(alias);
Chris@0 307 composers[alias.toLower()].insert(other);
Chris@0 308 DEBUG << "linking from alias " << alias << endl;
Chris@0 309 }
Chris@0 310 }
Chris@0 311
Chris@0 312 foreach (Document *d, c->pages()) {
Chris@0 313 bool found = false;
Chris@0 314 foreach (Document *dd, other->pages()) {
Chris@0 315 if (d->uri() == dd->uri()) {
Chris@0 316 found = true;
Chris@0 317 break;
Chris@0 318 }
Chris@0 319 }
Chris@0 320 if (!found) {
Chris@0 321 d->setTopic(other);
Chris@0 322 other->addPage(d);
Chris@0 323 }
Chris@0 324 }
Chris@0 325
Chris@0 326 //!!! actually the "approximate" bits of the following are bogus;
Chris@0 327 // a source reporting birth or death date as approx is probably
Chris@0 328 // more accurate than one reporting an exact date
Chris@0 329
Chris@0 330 if (c->birth()) {
Chris@0 331 if (!other->birth() || other->birth()->approximate()) {
Chris@0 332 other->setBirth(c->birth());
Chris@0 333 }
Chris@0 334 }
Chris@0 335
Chris@0 336 if (c->death()) {
Chris@0 337 if (!other->death() || other->death()->approximate()) {
Chris@0 338 other->setDeath(c->death());
Chris@0 339 }
Chris@0 340 }
Chris@0 341
Chris@0 342 if (c->gender() != "") other->setGender(c->gender());
Chris@0 343 if (c->nationality() != "") other->setNationality(c->nationality());
Chris@0 344 if (c->remarks() != "") other->setRemarks(c->remarks());
Chris@0 345 if (c->period() != "") other->setPeriod(c->period());
Chris@0 346
Chris@0 347 }
Chris@0 348
Chris@0 349 QString
Chris@0 350 asciify(QString field)
Chris@0 351 {
Chris@0 352 // accented characters etc -- add "ascii version" for dumb search purposes
Chris@0 353 QString ascii;
Chris@0 354 for (int i = 0; i < field.length(); ++i) {
Chris@0 355 QString dc = field[i].decomposition();
Chris@0 356 if (dc != "") ascii += dc[0];
Chris@0 357 else if (field[i] == QChar(0x00DF)) {
Chris@0 358 ascii += "ss";
Chris@0 359 } else {
Chris@0 360 ascii += field[i];
Chris@0 361 }
Chris@0 362 }
Chris@0 363 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
Chris@0 364 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
Chris@0 365 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
Chris@0 366 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
Chris@0 367 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
Chris@0 368 return ascii;
Chris@0 369 }
Chris@0 370
Chris@0 371 void
Chris@0 372 asciify(Composer *c)
Chris@0 373 {
Chris@0 374 QString n = c->name();
Chris@0 375 QString asc = asciify(n);
Chris@0 376 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 377 foreach (QString alias, c->aliases()) {
Chris@0 378 asc = asciify(alias);
Chris@0 379 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 380 }
Chris@0 381 }
Chris@0 382
Chris@0 383 void
Chris@0 384 asciify(Work *w)
Chris@0 385 {
Chris@0 386 QString n = w->name();
Chris@0 387 QString asc = asciify(n);
Chris@0 388 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 389 foreach (QString alias, w->aliases()) {
Chris@0 390 asc = asciify(alias);
Chris@0 391 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 392 }
Chris@0 393 }
Chris@0 394
Chris@0 395 void
Chris@0 396 assignUri(Store *s, Composer *c)
Chris@0 397 {
Chris@0 398 static QSet<QString> convSet;
Chris@0 399 QString conv = c->name();
Chris@0 400 if (!conv.contains(",")) {
Chris@0 401 QStringList sl = conv.split(" ");
Chris@0 402 if (!sl.empty()) {
Chris@0 403 sl.push_front(sl[sl.size()-1]);
Chris@0 404 sl.removeLast();
Chris@0 405 conv = sl.join(" ");
Chris@0 406 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
Chris@0 407 }
Chris@0 408 }
Chris@0 409 conv = asciify(conv);
Chris@0 410 conv.replace(" ", "_");
Chris@0 411 conv.replace("-", "_");
Chris@0 412 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@0 413 conv = conv.toLower();
Chris@0 414 QString initial = conv;
Chris@1 415 int i = 2;
Chris@0 416 while (convSet.contains(conv)) {
Chris@0 417 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 418 i++;
Chris@0 419 }
Chris@0 420 convSet.insert(conv);
Chris@1 421 c->setProperty("uri", s->expand(":composer/" + conv));
Chris@0 422 }
Chris@0 423
Chris@0 424 void
Chris@0 425 assignUri(Store *s, Work *w, Composer *c)
Chris@0 426 {
Chris@0 427 QString pfx = c->property("uri").toUrl().toString();
Chris@0 428 DEBUG << "pfx = " << pfx << endl;
Chris@1 429 if (!pfx.contains("composer/")) pfx = "";
Chris@0 430
Chris@0 431 static QSet<QString> convSet;
Chris@1 432
Chris@0 433 QString conv = w->catalogue();
Chris@0 434 if (conv == "") conv = w->opus();
Chris@0 435 conv = conv.replace(".", "");
Chris@0 436 bool hasOpus = (conv != "");
Chris@1 437 if (conv == "") conv = w->name().toLower();
Chris@0 438 if (w->number() != "") conv = conv + "_no" + w->number();
Chris@0 439 conv = asciify(conv);
Chris@0 440 conv.replace(" ", "_");
Chris@0 441 conv.replace("-", "_");
Chris@0 442 conv.replace(":", "_");
Chris@0 443 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@1 444
Chris@1 445 if (pfx != "") conv = pfx + "/work/" + conv;
Chris@1 446
Chris@0 447 // I think actually for works we want to merge duplicates rather than
Chris@0 448 // assign them separate URIs, _unless_ they lack a viable opus number
Chris@0 449 if (!hasOpus) {
Chris@0 450 QString initial = conv;
Chris@1 451 int i = 2;
Chris@0 452 while (convSet.contains(conv)) {
Chris@0 453 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 454 i++;
Chris@0 455 }
Chris@0 456 }
Chris@0 457 convSet.insert(conv);
Chris@1 458
Chris@1 459 w->setProperty("uri", conv);
Chris@0 460 }
Chris@0 461
Chris@0 462 void
Chris@0 463 addDbpediaResource(Store *store, QObject *o, QString s)
Chris@0 464 {
Chris@0 465 QUrl u = o->property("uri").toUrl();
Chris@0 466 if (u == QUrl()) return;
Chris@0 467 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
Chris@0 468 store->add(Triple(u,
Chris@0 469 "mo:wikipedia",
Chris@0 470 QUrl(s)));
Chris@0 471 s.replace("http://en.wikipedia.org/wiki/",
Chris@0 472 "http://dbpedia.org/resource/");
Chris@0 473 store->add(Triple(u,
Chris@0 474 "owl:sameAs",
Chris@0 475 QUrl(s)));
Chris@0 476 }
Chris@0 477 }
Chris@0 478
Chris@0 479 int main(int argc, char **argv)
Chris@0 480 {
Chris@0 481 qRegisterMetaType<HistoricalEvent *>
Chris@0 482 ("ClassicalData::HistoricalEvent*");
Chris@0 483 qRegisterMetaType<Birth *>
Chris@0 484 ("ClassicalData::Birth*");
Chris@0 485 qRegisterMetaType<Death *>
Chris@0 486 ("ClassicalData::Death*");
Chris@0 487 qRegisterMetaType<Composition *>
Chris@0 488 ("ClassicalData::Composition*");
Chris@0 489 qRegisterMetaType<Work *>
Chris@0 490 ("ClassicalData::Work*");
Chris@0 491 qRegisterMetaType<Movement *>
Chris@0 492 ("ClassicalData::Movement*");
Chris@0 493 qRegisterMetaType<Composer *>
Chris@0 494 ("ClassicalData::Composer*");
Chris@0 495 qRegisterMetaType<Document *>
Chris@0 496 ("ClassicalData::Document*");
Chris@0 497 qRegisterMetaType<Form *>
Chris@0 498 ("ClassicalData::Form*");
Chris@0 499 qRegisterMetaType<QSet<Work *> >
Chris@0 500 ("QSet<ClassicalData::Work*>");
Chris@0 501 qRegisterMetaType<QSet<Movement *> >
Chris@0 502 ("QSet<ClassicalData::Movement*>");
Chris@0 503 qRegisterMetaType<QSet<Document *> >
Chris@0 504 ("QSet<ClassicalData::Document*>");
Chris@0 505 qRegisterMetaType<QSet<Form *> >
Chris@0 506 ("QSet<ClassicalData::Form*>");
Chris@0 507 qRegisterMetaType<QSet<QString> >
Chris@0 508 ("QSet<QString>");
Chris@0 509
Chris@0 510 qRegisterMetaType<ClassicalComposersOrgImporter *>
Chris@0 511 ("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 512 qRegisterMetaType<ClassicalDotNetImporter *>
Chris@0 513 ("ClassicalData::ClassicalDotNetImporter*");
Chris@0 514 qRegisterMetaType<WikipediaComposersImporter *>
Chris@0 515 ("ClassicalData::WikipediaComposersImporter*");
Chris@0 516 qRegisterMetaType<WikipediaWorksImporter *>
Chris@0 517 ("ClassicalData::WikipediaWorksImporter*");
Chris@0 518 qRegisterMetaType<WikipediaWorksKImporter *>
Chris@0 519 ("ClassicalData::WikipediaWorksKImporter*");
Chris@0 520 qRegisterMetaType<WikipediaWorksListImporter *>
Chris@0 521 ("ClassicalData::WikipediaWorksListImporter*");
Chris@0 522 qRegisterMetaType<HobokenImporter *>
Chris@0 523 ("ClassicalData::HobokenImporter*");
Chris@0 524
Chris@0 525 ObjectBuilder::getInstance()->registerClass
Chris@0 526 <HistoricalEvent>("ClassicalData::HistoricalEvent*");
Chris@0 527 ObjectBuilder::getInstance()->registerClass
Chris@0 528 <Birth>("ClassicalData::Birth*");
Chris@0 529 ObjectBuilder::getInstance()->registerClass
Chris@0 530 <Death>("ClassicalData::Death*");
Chris@0 531 ObjectBuilder::getInstance()->registerClass
Chris@0 532 <Composition>("ClassicalData::Composition*");
Chris@0 533 ObjectBuilder::getInstance()->registerClass
Chris@0 534 <Work, QObject>("ClassicalData::Work*");
Chris@0 535 ObjectBuilder::getInstance()->registerClass
Chris@0 536 <Movement, QObject>("ClassicalData::Movement*");
Chris@0 537 ObjectBuilder::getInstance()->registerClass
Chris@0 538 <Composer, QObject>("ClassicalData::Composer*");
Chris@0 539 ObjectBuilder::getInstance()->registerClass
Chris@0 540 <Document, QObject>("ClassicalData::Document*");
Chris@0 541 ObjectBuilder::getInstance()->registerClass
Chris@0 542 <Form, QObject>("ClassicalData::Form*");
Chris@0 543
Chris@0 544 ObjectBuilder::getInstance()->registerClass
Chris@0 545 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 546 ObjectBuilder::getInstance()->registerClass
Chris@0 547 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
Chris@0 548 ObjectBuilder::getInstance()->registerClass
Chris@0 549 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
Chris@0 550 ObjectBuilder::getInstance()->registerClass
Chris@0 551 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
Chris@0 552 ObjectBuilder::getInstance()->registerClass
Chris@0 553 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
Chris@0 554 ObjectBuilder::getInstance()->registerClass
Chris@0 555 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
Chris@0 556 ObjectBuilder::getInstance()->registerClass
Chris@0 557 <HobokenImporter>("ClassicalData::HobokenImporter*");
Chris@0 558
Chris@0 559 ContainerBuilder::getInstance()->registerContainer
Chris@0 560 <QString, QSet<QString> >
Chris@0 561 ("QString", "QSet<QString>", ContainerBuilder::SetKind);
Chris@0 562
Chris@0 563 ContainerBuilder::getInstance()->registerContainer
Chris@0 564 <Work*, QSet<Work*> >
Chris@0 565 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
Chris@0 566 ContainerBuilder::SetKind);
Chris@0 567
Chris@0 568 ContainerBuilder::getInstance()->registerContainer
Chris@0 569 <Movement*, QSet<Movement*> >
Chris@0 570 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
Chris@0 571 ContainerBuilder::SetKind);
Chris@0 572
Chris@0 573 ContainerBuilder::getInstance()->registerContainer
Chris@0 574 <Document*, QSet<Document*> >
Chris@0 575 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
Chris@0 576 ContainerBuilder::SetKind);
Chris@0 577
Chris@0 578 ContainerBuilder::getInstance()->registerContainer
Chris@0 579 <Form*, QSet<Form*> >
Chris@0 580 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
Chris@0 581 ContainerBuilder::SetKind);
Chris@0 582
Chris@0 583 BasicStore *store = BasicStore::load("file:importers.ttl");
Chris@0 584 ObjectMapper mapper(store);
Chris@0 585 QObject *parentObject = mapper.loadAllObjects(new QObject());
Chris@0 586
Chris@0 587 BasicStore *outstore = new BasicStore();
Chris@1 588 outstore->setBaseUri("http://dbtune.org/classical/resource/");
Chris@0 589 ObjectMapper outmapper(outstore);
Chris@0 590
Chris@0 591 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
Chris@1 592
Chris@1 593 outmapper.setObjectTypePrefix("http://dbtune.org/classical/resource/");
Chris@1 594 outmapper.setPropertyPrefix("http://dbtune.org/classical/resource/vocab/");
Chris@1 595 outmapper.setRelationshipPrefix("http://dbtune.org/classical/resource/vocab/relationship/");
Chris@0 596
Chris@0 597 outstore->addPrefix("type", outmapper.getObjectTypePrefix());
Chris@1 598 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "type/");
Chris@0 599 outstore->addPrefix("property", outmapper.getPropertyPrefix());
Chris@0 600 outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
Chris@1 601
Chris@0 602 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
Chris@0 603 outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
Chris@0 604 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
Chris@0 605 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
Chris@0 606 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
Chris@0 607 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
Chris@1 608 outstore->addPrefix("db", "http://dbtune.org/musicbrainz/resource/");
Chris@1 609 outstore->addPrefix("dbv", "http://dbtune.org/musicbrainz/resource/vocab/");
Chris@0 610
Chris@1 611 outmapper.addTypeMapping("ClassicalData::Composer", "classical:Composer");
Chris@1 612 outmapper.addPropertyMapping("ClassicalData::Composer", "pages", "foaf:page");
Chris@1 613 outmapper.addPropertyMapping("ClassicalData::Composer", "name", "foaf:name");
Chris@1 614 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", "dbv:alias");
Chris@1 615 outmapper.addPropertyMapping("ClassicalData::Composer", "birth", "property:birth");
Chris@1 616 outmapper.addPropertyMapping("ClassicalData::Composer", "death", "property:death");
Chris@0 617
Chris@1 618 outmapper.addTypeMapping("ClassicalData::Birth", "bio:Birth");
Chris@1 619 outmapper.addTypeMapping("ClassicalData::Death", "bio:Death");
Chris@1 620 outmapper.addPropertyMapping("ClassicalData::Birth", "year", "bio:date");
Chris@1 621 outmapper.addPropertyMapping("ClassicalData::Death", "year", "bio:date");
Chris@1 622 outmapper.addPropertyMapping("ClassicalData::Birth", "place", "bio:place");
Chris@1 623 outmapper.addPropertyMapping("ClassicalData::Death", "place", "bio:place");
Chris@0 624
Chris@1 625 outmapper.addTypeMapping("ClassicalData::Document", "foaf:Document");
Chris@1 626 outmapper.addPropertyMapping("ClassicalData::Document", "topic", "foaf:primaryTopic");
Chris@0 627
Chris@1 628 outmapper.addTypeMapping("ClassicalData::Work", "mo:MusicalWork");
Chris@1 629
Chris@1 630 outmapper.addPropertyMapping("ClassicalData::Work", "composition", "mo:composed_in");
Chris@1 631 outmapper.addPropertyMapping("ClassicalData::Work", "opus", "mo:opus");
Chris@1 632 outmapper.addPropertyMapping("ClassicalData::Work", "catalogue", "mo:catalogue");
Chris@1 633 outmapper.addPropertyMapping("ClassicalData::Work", "number", "mo:number");
Chris@1 634 outmapper.addPropertyMapping("ClassicalData::Work", "partOf", "dc:isPartOf");
Chris@1 635 outmapper.addPropertyMapping("ClassicalData::Work", "parts", "dc:hasPart");
Chris@1 636 outmapper.addPropertyMapping("ClassicalData::Work", "pages", "foaf:page");
Chris@1 637 outmapper.addPropertyMapping("ClassicalData::Work", "forms", "property:form");
Chris@1 638 outmapper.addPropertyMapping("ClassicalData::Work", "key", "mo:key");
Chris@1 639 outmapper.addPropertyMapping("ClassicalData::Work", "aliases", "dbv:alias");
Chris@1 640 outmapper.addPropertyMapping("ClassicalData::Work", "name", "dc:title");
Chris@1 641
Chris@1 642 outmapper.addTypeMapping("ClassicalData::Composition", "mo:Composition");
Chris@1 643 outmapper.addPropertyMapping("ClassicalData::Composition", "composer", "mo:composer");
Chris@1 644 outmapper.addPropertyMapping("ClassicalData::Composition", "works", "mo:produced_work");
Chris@1 645
Chris@1 646 outstore->add(Triple("classical:Composer", "a", outstore->expand("owl:Class")));
Chris@1 647 outstore->add(Triple("classical:Composer", "rdfs:subClassOf", outstore->expand("mo:MusicArtist")));
Chris@1 648
Chris@1 649 outstore->add(Triple("property:birth", "a", outstore->expand("owl:ObjectProperty")));
Chris@1 650 outstore->add(Triple("property:birth", "rdfs:subPropertyOf", outstore->expand("bio:event")));
Chris@1 651
Chris@1 652 outstore->add(Triple("property:death", "a", outstore->expand("owl:ObjectProperty")));
Chris@1 653 outstore->add(Triple("property:death", "rdfs:subPropertyOf", outstore->expand("bio:event")));
Chris@0 654
Chris@0 655 QList<Importer *> importers = parentObject->findChildren<Importer *>();
Chris@0 656 std::cerr << "have " << importers.size() << " importers" << std::endl;
Chris@0 657
Chris@0 658 ComposerMap composers;
Chris@0 659
Chris@0 660 QList<Composer *> dated;
Chris@0 661 QList<Composer *> undated;
Chris@0 662
Chris@0 663 QList<Work *> works;
Chris@0 664 QList<Composition *> compositions;
Chris@0 665 QList<QObject *> other;
Chris@0 666
Chris@0 667 foreach (Importer *importer, importers) {
Chris@0 668 QObjectList objects = importer->getImportedObjects();
Chris@0 669 foreach (QObject *o, objects) {
Chris@0 670 Composer *c;
Chris@0 671 if ((c = qobject_cast<Composer *>(o))) {
Chris@0 672 addMiscExpansions(c);
Chris@0 673 asciify(c);
Chris@0 674 if (c->birth() || c->death()) dated.push_back(c);
Chris@0 675 else undated.push_back(c);
Chris@0 676 continue;
Chris@0 677 }
Chris@0 678 Work *w;
Chris@0 679 if ((w = qobject_cast<Work *>(o))) {
Chris@0 680 asciify(w);
Chris@0 681 works.push_back(w);
Chris@0 682 continue;
Chris@0 683 }
Chris@0 684 Composition *cn;
Chris@0 685 if ((cn = qobject_cast<Composition *>(o))) {
Chris@0 686 compositions.push_back(cn);
Chris@0 687 continue;
Chris@0 688 }
Chris@0 689 }
Chris@0 690 }
Chris@0 691
Chris@0 692 // get all the dated composers merged before attempting to match
Chris@0 693 // the undated ones
Chris@0 694 foreach (Composer *c, dated) {
Chris@0 695 mergeComposer(c, composers);
Chris@0 696 }
Chris@0 697 foreach (Composer *c, undated) {
Chris@0 698 mergeComposer(c, composers);
Chris@0 699 }
Chris@0 700
Chris@0 701 QObjectList toStore;
Chris@0 702
Chris@0 703 QSet<Composer *> cset;
Chris@0 704 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
Chris@0 705 foreach (Composer *c, i.value()) {
Chris@0 706 if (!cset.contains(c)) {
Chris@0 707 assignUri(outstore, c);
Chris@0 708 toStore.push_back(c);
Chris@0 709 cset.insert(c);
Chris@0 710 }
Chris@0 711 foreach (Document *d, c->pages()) {
Chris@0 712 QString s = d->uri().toString();
Chris@0 713 addDbpediaResource(outstore, c, s);
Chris@0 714 }
Chris@0 715 }
Chris@0 716 }
Chris@0 717
Chris@0 718 QSet<QString> storedUris;
Chris@0 719
Chris@0 720 foreach (Work *w, works) {
Chris@0 721 Composition *cn = w->composition();
Chris@0 722 if (!cn) continue;
Chris@0 723 if (!cn->composer()) {
Chris@0 724 QString cname = cn->composerName();
Chris@0 725 if (cname != "") {
Chris@0 726 if (!composers.contains(cname.toLower())) {
Chris@0 727 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 728 } else {
Chris@0 729 QSet<Composer *> cs = composers[cname.toLower()];
Chris@0 730 if (cs.empty()) {
Chris@0 731 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 732 } else if (cs.size() > 1) {
Chris@0 733 DEBUG << "Failed to assign Composition to composer: "
Chris@0 734 << cs.size() << " composers match name " << cname << endl;
Chris@0 735 } else {
Chris@0 736 cn->setComposer(*cs.begin());
Chris@0 737 }
Chris@0 738 }
Chris@0 739 } else {
Chris@0 740 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
Chris@0 741 }
Chris@0 742 }
Chris@0 743
Chris@0 744 if (cn->composer()) {
Chris@0 745 assignUri(outstore, w, cn->composer());
Chris@0 746 }
Chris@0 747
Chris@0 748 foreach (Document *d, w->pages()) {
Chris@0 749 QString s = d->uri().toString();
Chris@0 750 addDbpediaResource(outstore, w, s);
Chris@1 751 if (!storedUris.contains(s)) {
Chris@1 752 toStore.push_back(d);
Chris@1 753 storedUris.insert(s);
Chris@1 754 }
Chris@0 755 }
Chris@0 756
Chris@0 757 QString u = w->property("uri").toUrl().toString();
Chris@0 758 if (u == "" || !storedUris.contains(u)) {
Chris@0 759 toStore.push_back(w);
Chris@0 760 if (u != "") storedUris.insert(u);
Chris@0 761 }
Chris@0 762 }
Chris@0 763
Chris@0 764 try {
Chris@0 765 outmapper.storeAllObjects(toStore);
Chris@0 766
Chris@0 767 } catch (RDFException e) {
Chris@0 768 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
Chris@0 769 }
Chris@0 770
Chris@0 771 DEBUG << "Stored, now saving" << endl;
Chris@0 772
Chris@0 773 outstore->save("test-out.ttl");
Chris@0 774
Chris@0 775 DEBUG << "Saved" << endl;
Chris@0 776
Chris@0 777
Chris@0 778 QMultiMap<QString, Composer *> cmap;
Chris@0 779 foreach (Composer *c, cset) {
Chris@0 780 QString n = c->getSortName(true);
Chris@0 781 cmap.insert(n, c);
Chris@0 782 }
Chris@0 783
Chris@0 784 std::cout << "Composers: " << cmap.size() << std::endl;
Chris@0 785
Chris@0 786 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 787 i != cmap.end(); ++i) {
Chris@0 788
Chris@0 789 QString n = i.key();
Chris@0 790 Composer *c = i.value();
Chris@0 791
Chris@0 792 std::cout << n.toStdString();
Chris@0 793
Chris@0 794 QString d = c->getDisplayDates();
Chris@0 795 if (d != "") std::cout << " (" << d.toStdString() << ")";
Chris@0 796 std::cout << std::endl;
Chris@0 797 }
Chris@0 798
Chris@0 799 std::cout << std::endl;
Chris@0 800
Chris@0 801 std::cout << "Works by composer:" << std::endl;
Chris@0 802
Chris@0 803 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 804 i != cmap.end(); ++i) {
Chris@0 805
Chris@0 806 QString n = i.key();
Chris@0 807 Composer *c = i.value();
Chris@0 808
Chris@0 809 std::set<Work *, Work::Ordering> wmap;
Chris@0 810 foreach (Work *w, works) {
Chris@0 811 Composition *cn = w->composition();
Chris@0 812 if (!cn) continue;
Chris@0 813 if (cn->composer() != c) continue;
Chris@0 814 if (w->partOf()) continue;
Chris@0 815 wmap.insert(w);
Chris@0 816 }
Chris@0 817
Chris@0 818 if (wmap.empty()) continue;
Chris@0 819
Chris@0 820 std::cout << n.toStdString() << std::endl;
Chris@0 821
Chris@0 822 foreach (Work *w, wmap) {
Chris@0 823 std::cout << " * ";
Chris@0 824 std::cout << w->name().toStdString();
Chris@0 825 if (w->catalogue() != "") {
Chris@0 826 std::cout << " [" << w->catalogue().toStdString() << "]";
Chris@0 827 }
Chris@0 828 if (w->opus() != "") {
Chris@0 829 std::cout << " [op. " << w->opus().toStdString() << "]";
Chris@0 830 }
Chris@0 831 std::cout << std::endl;
Chris@0 832 std::set<Work *, Work::Ordering> orderedParts;
Chris@0 833 foreach (Work *ww, w->parts()) {
Chris@0 834 orderedParts.insert(ww);
Chris@0 835 }
Chris@0 836 foreach (Work *ww, orderedParts) {
Chris@0 837 std::cout << " ";
Chris@0 838 if (ww->number() != "") {
Chris@0 839 std::cout << ww->number().toStdString() << ". ";
Chris@0 840 }
Chris@0 841 std::cout << ww->name().toStdString();
Chris@0 842 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
Chris@0 843 std::cout << " [" << ww->catalogue().toStdString() << "]";
Chris@0 844 }
Chris@0 845 if (ww->opus() != "" && ww->opus() != w->opus()) {
Chris@0 846 std::cout << " [op. " << ww->opus().toStdString() << "]";
Chris@0 847 }
Chris@0 848 std::cout << std::endl;
Chris@0 849 }
Chris@0 850 }
Chris@0 851
Chris@0 852 std::cout << std::endl;
Chris@0 853 }
Chris@0 854
Chris@0 855 delete outstore;
Chris@0 856
Chris@0 857 DEBUG << "Done" << endl;
Chris@0 858
Chris@0 859
Chris@0 860 }
Chris@0 861
Chris@0 862