annotate import/Test.cpp @ 0:e8f4c2b55fd8 classical-rdf

* reorganise
author Chris Cannam
date Tue, 01 Dec 2009 17:50:41 +0000
parents
children
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "Objects.h"
Chris@0 4
Chris@0 5 #include <dataquay/BasicStore.h>
Chris@0 6 #include <dataquay/RDFException.h>
Chris@0 7 #include <dataquay/objectmapper/ObjectMapper.h>
Chris@0 8 #include <dataquay/objectmapper/ObjectBuilder.h>
Chris@0 9 #include <dataquay/objectmapper/ContainerBuilder.h>
Chris@0 10
Chris@0 11 #include "ImportClassicalComposersOrg.h"
Chris@0 12 #include "ImportClassicalDotNet.h"
Chris@0 13 #include "ImportWikipediaComposers.h"
Chris@0 14 #include "ImportWikipediaWorks.h"
Chris@0 15 #include "ImportWikipediaWorksK.h"
Chris@0 16 #include "ImportWikipediaWorksList.h"
Chris@0 17 #include "ImportHoboken.h"
Chris@0 18
Chris@0 19 #include <dataquay/Debug.h>
Chris@0 20
Chris@0 21 using namespace ClassicalData;
Chris@0 22 using namespace Dataquay;
Chris@0 23
Chris@0 24 #include <iostream>
Chris@0 25 #include <set>
Chris@0 26
Chris@0 27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
Chris@0 28
Chris@0 29 bool datesMatch(Composer *a, Composer *b)
Chris@0 30 {
Chris@0 31 if (a->birth() && b->birth()) {
Chris@0 32 if (abs(a->birth()->year() - b->birth()->year()) > 1) {
Chris@0 33 if ((!a->birth()->approximate() && !b->birth()->approximate()) ||
Chris@0 34 (abs(a->birth()->year() - b->birth()->year()) > 10)) {
Chris@0 35 return false;
Chris@0 36 }
Chris@0 37 }
Chris@0 38 }
Chris@0 39 if (a->death() && b->death()) {
Chris@0 40 if (abs(a->death()->year() - b->death()->year()) > 1) {
Chris@0 41 if ((!a->death()->approximate() && !b->death()->approximate()) ||
Chris@0 42 (abs(a->death()->year() - b->death()->year()) > 10)) {
Chris@0 43 return false;
Chris@0 44 }
Chris@0 45 }
Chris@0 46 }
Chris@0 47 return true;
Chris@0 48 }
Chris@0 49
Chris@0 50 void
Chris@0 51 addMiscExpansions(Composer *c)
Chris@0 52 {
Chris@0 53 QString n = c->name();
Chris@0 54
Chris@0 55 DEBUG << "addMiscExpansions: n = " << n << endl;
Chris@0 56
Chris@0 57 // lovely hard-coded special cases go here! some of these are
Chris@0 58 // needed for works->composer assignments
Chris@0 59 if (n == "Balakirev, Milii") {
Chris@0 60 c->addAlias("Mily Balakirev");
Chris@0 61 }
Chris@0 62 if (n.startsWith("Cui, C")) {
Chris@0 63 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
Chris@0 64 }
Chris@0 65 if (n == "Handel, George Frideric") {
Chris@0 66 c->addAlias("Handel, Georg Friedrich");
Chris@0 67 c->addAlias("Handel");
Chris@0 68 }
Chris@0 69 if (n == "Mayr, Simon") {
Chris@0 70 c->addAlias("Mayr");
Chris@0 71 }
Chris@0 72
Chris@0 73 n.replace(", Sr.", " Sr.");
Chris@0 74 n.replace(", Jr.", " Jr.");
Chris@0 75
Chris@0 76 int comma = n.indexOf(", ");
Chris@0 77 if (comma > 0 && comma + 2 < n.length()) {
Chris@0 78
Chris@0 79 QString left = n.left(comma);
Chris@0 80 QString right = n.right(n.length() - comma - 2);
Chris@0 81
Chris@0 82 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
Chris@0 83 if (jrsr.indexIn(right) >= 0) {
Chris@0 84 left = left + jrsr.cap(1);
Chris@0 85 right = right.left(right.length()-jrsr.matchedLength());
Chris@0 86 }
Chris@0 87 n = right + " " + left;
Chris@0 88 }
Chris@0 89
Chris@0 90 if (n != c->name()) c->addAlias(n);
Chris@0 91
Chris@0 92 if (n.contains("Sergey")) {
Chris@0 93 QString nn(n);
Chris@0 94 nn.replace("Sergey", "Sergei");
Chris@0 95 c->addAlias(nn);
Chris@0 96 }
Chris@0 97
Chris@0 98 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
Chris@0 99 if (sr.indexIn(n) >= 0) {
Chris@0 100 QString nr = n;
Chris@0 101 nr.replace(sr.pos(0), sr.matchedLength(), " I");
Chris@0 102 nr.replace(" ", " ");
Chris@0 103 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 104 c->addAlias(nr);
Chris@0 105 }
Chris@0 106 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
Chris@0 107 if (jr.indexIn(n) >= 0) {
Chris@0 108 QString nr = n;
Chris@0 109 nr.replace(jr.pos(0), jr.matchedLength(), " II");
Chris@0 110 nr.replace(" ", " ");
Chris@0 111 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
Chris@0 112 c->addAlias(nr);
Chris@0 113 }
Chris@0 114 QString nr = n;
Chris@0 115 nr.replace("(I)", "I");
Chris@0 116 nr.replace("(II)", "II");
Chris@0 117 nr.replace("(III)", "III");
Chris@0 118 c->addAlias(nr);
Chris@0 119 }
Chris@0 120
Chris@0 121 bool namesFuzzyMatch(QString an, Composer *b)
Chris@0 122 {
Chris@0 123 // ew!
Chris@0 124
Chris@0 125 QString bn = b->name();
Chris@0 126 if (bn == an) return true;
Chris@0 127 if (b->aliases().contains(an)) return true;
Chris@0 128 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@0 129 if (an.contains(",")) {
Chris@0 130 an.replace(",", "");
Chris@0 131 } else {
Chris@0 132 aSurnameIndex = -1;
Chris@0 133 }
Chris@0 134 if (bn.contains(",")) {
Chris@0 135 bn.replace(",", "");
Chris@0 136 } else {
Chris@0 137 bSurnameIndex = -1;
Chris@0 138 }
Chris@0 139 QStringList nl = an.split(QRegExp("[ -]"));
Chris@0 140 QStringList bnl = bn.split(QRegExp("[ -]"));
Chris@0 141 int matchCount = 0;
Chris@0 142 QString surnameMatch = "";
Chris@0 143 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@0 144 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@0 145 if (nl[aSurnameIndex][0].isUpper() &&
Chris@0 146 nl[aSurnameIndex] != "Della" &&
Chris@0 147 nl[aSurnameIndex] == bnl[bSurnameIndex]) {
Chris@0 148 surnameMatch = nl[aSurnameIndex];
Chris@0 149 }
Chris@0 150 foreach (QString elt, nl) {
Chris@0 151 if (!elt[0].isUpper() || elt == "Della") continue;
Chris@0 152 if (bnl.contains(elt)) {
Chris@0 153 ++matchCount;
Chris@0 154 continue;
Chris@0 155 }
Chris@0 156 }
Chris@0 157 if (matchCount > 1 && surnameMatch != "") {
Chris@0 158 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@0 159 return true;
Chris@0 160 }
Chris@0 161 return false;
Chris@0 162 }
Chris@0 163
Chris@0 164 bool
Chris@0 165 hasBetterName(Composer *c, Composer *other)
Chris@0 166 {
Chris@0 167 if (c->name() == other->name()) return false;
Chris@0 168
Chris@0 169 // Try to guess which of c and other is more likely to have a good
Chris@0 170 // "canonical form" of the composer's name
Chris@0 171
Chris@0 172 if (c->name().startsWith("van ")) {
Chris@0 173 return false; // wrong choice of sort for e.g. LvB; should be
Chris@0 174 // Beethoven, Ludwig van, not van Beethoven, Ludwig
Chris@0 175 }
Chris@0 176 if (other->name().startsWith("van ")) {
Chris@0 177 return true;
Chris@0 178 }
Chris@0 179
Chris@0 180 if (c->aliases().size() != other->aliases().size()) {
Chris@0 181 // a rather weak heuristic
Chris@0 182 return c->aliases().size() > other->aliases().size();
Chris@0 183 }
Chris@0 184
Chris@0 185 if (c->name().contains(',') && !other->name().contains(',')) {
Chris@0 186 // another rather weak heuristic
Chris@0 187 return true;
Chris@0 188 }
Chris@0 189
Chris@0 190 return false;
Chris@0 191 }
Chris@0 192
Chris@0 193 void mergeComposer(Composer *c, ComposerMap &composers)
Chris@0 194 {
Chris@0 195 QString name = c->name();
Chris@0 196
Chris@0 197 QSet<QString> allNames = c->aliases();
Chris@0 198 allNames.insert(name);
Chris@0 199
Chris@0 200 QString dates;
Chris@0 201 if (c->birth()) {
Chris@0 202 if (c->death()) {
Chris@0 203 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
Chris@0 204 } else {
Chris@0 205 dates = QString("%1-").arg(c->birth()->year());
Chris@0 206 }
Chris@0 207 }
Chris@0 208 if (dates != "") {
Chris@0 209 allNames.insert(dates);
Chris@0 210 }
Chris@0 211
Chris@0 212 QSet<Composer *> matches;
Chris@0 213
Chris@0 214 foreach (QString candidateName, allNames) {
Chris@0 215 QString key = candidateName.toLower();
Chris@0 216 if (composers.contains(key)) {
Chris@0 217 foreach (Composer *candidate, composers[key]) {
Chris@0 218 if (candidateName == dates) {
Chris@0 219 if (!namesFuzzyMatch(c->name(), candidate) &&
Chris@0 220 !namesFuzzyMatch(candidate->name(), c)) {
Chris@0 221 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
Chris@0 222 continue;
Chris@0 223 } else {
Chris@0 224 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
Chris@0 225 }
Chris@0 226 } else {
Chris@0 227 if (!datesMatch(c, candidate)) {
Chris@0 228 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
Chris@0 229 continue;
Chris@0 230 }
Chris@0 231 }
Chris@0 232 matches.insert(candidate);
Chris@0 233 }
Chris@0 234 }
Chris@0 235 }
Chris@0 236
Chris@0 237 if (matches.empty()) {
Chris@0 238 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
Chris@0 239
Chris@0 240 if (!c->birth() && !c->death()) {
Chris@0 241 // laboriously look for fuzzy match across _all_ composers
Chris@0 242 for (ComposerMap::iterator i = composers.begin();
Chris@0 243 i != composers.end(); ++i) {
Chris@0 244 foreach (Composer *candidate, *i) {
Chris@0 245 if (namesFuzzyMatch(c->name(), candidate)) {
Chris@0 246 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
Chris@0 247 matches.insert(candidate);
Chris@0 248 break;
Chris@0 249 }
Chris@0 250 }
Chris@0 251 if (!matches.empty()) break;
Chris@0 252 }
Chris@0 253 }
Chris@0 254
Chris@0 255 if (matches.empty()) {
Chris@0 256 foreach (QString candidateName, allNames) {
Chris@0 257 composers[candidateName.toLower()].insert(c);
Chris@0 258 DEBUG << "added for alias or date " << candidateName << endl;
Chris@0 259 }
Chris@0 260 return;
Chris@0 261 }
Chris@0 262 }
Chris@0 263
Chris@0 264 if (matches.size() > 1) {
Chris@0 265 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
Chris@0 266 }
Chris@0 267
Chris@0 268 Composer *other = *matches.begin();
Chris@0 269
Chris@0 270 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
Chris@0 271
Chris@0 272 if (hasBetterName(c, other)) {
Chris@0 273 other->addAlias(other->name());
Chris@0 274 other->setName(c->name());
Chris@0 275 } else {
Chris@0 276 other->addAlias(c->name());
Chris@0 277 }
Chris@0 278 composers[c->name().toLower()].insert(other);
Chris@0 279 DEBUG << "linking from alias " << c->name() << endl;
Chris@0 280
Chris@0 281 foreach (QString alias, c->aliases()) {
Chris@0 282 if (alias != other->name() &&
Chris@0 283 !other->aliases().contains(alias)) {
Chris@0 284 other->addAlias(alias);
Chris@0 285 composers[alias.toLower()].insert(other);
Chris@0 286 DEBUG << "linking from alias " << alias << endl;
Chris@0 287 }
Chris@0 288 }
Chris@0 289
Chris@0 290 foreach (Document *d, c->pages()) {
Chris@0 291 bool found = false;
Chris@0 292 foreach (Document *dd, other->pages()) {
Chris@0 293 if (d->uri() == dd->uri()) {
Chris@0 294 found = true;
Chris@0 295 break;
Chris@0 296 }
Chris@0 297 }
Chris@0 298 if (!found) {
Chris@0 299 d->setTopic(other);
Chris@0 300 other->addPage(d);
Chris@0 301 }
Chris@0 302 }
Chris@0 303
Chris@0 304 //!!! actually the "approximate" bits of the following are bogus;
Chris@0 305 // a source reporting birth or death date as approx is probably
Chris@0 306 // more accurate than one reporting an exact date
Chris@0 307
Chris@0 308 if (c->birth()) {
Chris@0 309 if (!other->birth() || other->birth()->approximate()) {
Chris@0 310 other->setBirth(c->birth());
Chris@0 311 }
Chris@0 312 }
Chris@0 313
Chris@0 314 if (c->death()) {
Chris@0 315 if (!other->death() || other->death()->approximate()) {
Chris@0 316 other->setDeath(c->death());
Chris@0 317 }
Chris@0 318 }
Chris@0 319
Chris@0 320 if (c->gender() != "") other->setGender(c->gender());
Chris@0 321 if (c->nationality() != "") other->setNationality(c->nationality());
Chris@0 322 if (c->remarks() != "") other->setRemarks(c->remarks());
Chris@0 323 if (c->period() != "") other->setPeriod(c->period());
Chris@0 324
Chris@0 325 }
Chris@0 326
Chris@0 327 QString
Chris@0 328 asciify(QString field)
Chris@0 329 {
Chris@0 330 // accented characters etc -- add "ascii version" for dumb search purposes
Chris@0 331 QString ascii;
Chris@0 332 for (int i = 0; i < field.length(); ++i) {
Chris@0 333 QString dc = field[i].decomposition();
Chris@0 334 if (dc != "") ascii += dc[0];
Chris@0 335 else if (field[i] == QChar(0x00DF)) {
Chris@0 336 ascii += "ss";
Chris@0 337 } else {
Chris@0 338 ascii += field[i];
Chris@0 339 }
Chris@0 340 }
Chris@0 341 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
Chris@0 342 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
Chris@0 343 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
Chris@0 344 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
Chris@0 345 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
Chris@0 346 return ascii;
Chris@0 347 }
Chris@0 348
Chris@0 349 void
Chris@0 350 asciify(Composer *c)
Chris@0 351 {
Chris@0 352 QString n = c->name();
Chris@0 353 QString asc = asciify(n);
Chris@0 354 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 355 foreach (QString alias, c->aliases()) {
Chris@0 356 asc = asciify(alias);
Chris@0 357 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
Chris@0 358 }
Chris@0 359 }
Chris@0 360
Chris@0 361 void
Chris@0 362 asciify(Work *w)
Chris@0 363 {
Chris@0 364 QString n = w->name();
Chris@0 365 QString asc = asciify(n);
Chris@0 366 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 367 foreach (QString alias, w->aliases()) {
Chris@0 368 asc = asciify(alias);
Chris@0 369 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
Chris@0 370 }
Chris@0 371 }
Chris@0 372
Chris@0 373 void
Chris@0 374 assignUri(Store *s, Composer *c)
Chris@0 375 {
Chris@0 376 static QSet<QString> convSet;
Chris@0 377 QString conv = c->name();
Chris@0 378 if (!conv.contains(",")) {
Chris@0 379 QStringList sl = conv.split(" ");
Chris@0 380 if (!sl.empty()) {
Chris@0 381 sl.push_front(sl[sl.size()-1]);
Chris@0 382 sl.removeLast();
Chris@0 383 conv = sl.join(" ");
Chris@0 384 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
Chris@0 385 }
Chris@0 386 }
Chris@0 387 conv = asciify(conv);
Chris@0 388 conv.replace(" ", "_");
Chris@0 389 conv.replace("-", "_");
Chris@0 390 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@0 391 conv = conv.toLower();
Chris@0 392 QString initial = conv;
Chris@0 393 int i = 1;
Chris@0 394 while (convSet.contains(conv)) {
Chris@0 395 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 396 i++;
Chris@0 397 }
Chris@0 398 convSet.insert(conv);
Chris@0 399 c->setProperty("uri", s->expand(":composer_" + conv));
Chris@0 400 }
Chris@0 401
Chris@0 402 void
Chris@0 403 assignUri(Store *s, Work *w, Composer *c)
Chris@0 404 {
Chris@0 405 QString pfx = c->property("uri").toUrl().toString();
Chris@0 406 DEBUG << "pfx = " << pfx << endl;
Chris@0 407 if (!pfx.contains("composer_")) pfx = "";
Chris@0 408 else pfx.replace(QRegExp("^.*composer_"), "");
Chris@0 409
Chris@0 410 static QSet<QString> convSet;
Chris@0 411 QString conv = w->catalogue();
Chris@0 412 if (conv == "") conv = w->opus();
Chris@0 413 conv = conv.replace(".", "");
Chris@0 414 bool hasOpus = (conv != "");
Chris@0 415 if (conv == "") conv = w->name();
Chris@0 416 if (w->number() != "") conv = conv + "_no" + w->number();
Chris@0 417 if (pfx != "") conv = pfx + "_" + conv;
Chris@0 418 conv = asciify(conv);
Chris@0 419 conv.replace(" ", "_");
Chris@0 420 conv.replace("-", "_");
Chris@0 421 conv.replace(":", "_");
Chris@0 422 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
Chris@0 423 conv = conv.toLower();
Chris@0 424 // I think actually for works we want to merge duplicates rather than
Chris@0 425 // assign them separate URIs, _unless_ they lack a viable opus number
Chris@0 426 if (!hasOpus) {
Chris@0 427 QString initial = conv;
Chris@0 428 int i = 1;
Chris@0 429 while (convSet.contains(conv)) {
Chris@0 430 conv = QString("%1__%2").arg(initial).arg(i);
Chris@0 431 i++;
Chris@0 432 }
Chris@0 433 }
Chris@0 434 convSet.insert(conv);
Chris@0 435 w->setProperty("uri", s->expand(":work_" + conv));
Chris@0 436 }
Chris@0 437
Chris@0 438 void
Chris@0 439 addDbpediaResource(Store *store, QObject *o, QString s)
Chris@0 440 {
Chris@0 441 QUrl u = o->property("uri").toUrl();
Chris@0 442 if (u == QUrl()) return;
Chris@0 443 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
Chris@0 444 store->add(Triple(u,
Chris@0 445 "mo:wikipedia",
Chris@0 446 QUrl(s)));
Chris@0 447 s.replace("http://en.wikipedia.org/wiki/",
Chris@0 448 "http://dbpedia.org/resource/");
Chris@0 449 store->add(Triple(u,
Chris@0 450 "owl:sameAs",
Chris@0 451 QUrl(s)));
Chris@0 452 }
Chris@0 453 }
Chris@0 454
Chris@0 455 int main(int argc, char **argv)
Chris@0 456 {
Chris@0 457 qRegisterMetaType<HistoricalEvent *>
Chris@0 458 ("ClassicalData::HistoricalEvent*");
Chris@0 459 qRegisterMetaType<Birth *>
Chris@0 460 ("ClassicalData::Birth*");
Chris@0 461 qRegisterMetaType<Death *>
Chris@0 462 ("ClassicalData::Death*");
Chris@0 463 qRegisterMetaType<Composition *>
Chris@0 464 ("ClassicalData::Composition*");
Chris@0 465 qRegisterMetaType<Work *>
Chris@0 466 ("ClassicalData::Work*");
Chris@0 467 qRegisterMetaType<Movement *>
Chris@0 468 ("ClassicalData::Movement*");
Chris@0 469 qRegisterMetaType<Composer *>
Chris@0 470 ("ClassicalData::Composer*");
Chris@0 471 qRegisterMetaType<Document *>
Chris@0 472 ("ClassicalData::Document*");
Chris@0 473 qRegisterMetaType<Form *>
Chris@0 474 ("ClassicalData::Form*");
Chris@0 475 qRegisterMetaType<QSet<Work *> >
Chris@0 476 ("QSet<ClassicalData::Work*>");
Chris@0 477 qRegisterMetaType<QSet<Movement *> >
Chris@0 478 ("QSet<ClassicalData::Movement*>");
Chris@0 479 qRegisterMetaType<QSet<Document *> >
Chris@0 480 ("QSet<ClassicalData::Document*>");
Chris@0 481 qRegisterMetaType<QSet<Form *> >
Chris@0 482 ("QSet<ClassicalData::Form*>");
Chris@0 483 qRegisterMetaType<QSet<QString> >
Chris@0 484 ("QSet<QString>");
Chris@0 485
Chris@0 486 qRegisterMetaType<ClassicalComposersOrgImporter *>
Chris@0 487 ("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 488 qRegisterMetaType<ClassicalDotNetImporter *>
Chris@0 489 ("ClassicalData::ClassicalDotNetImporter*");
Chris@0 490 qRegisterMetaType<WikipediaComposersImporter *>
Chris@0 491 ("ClassicalData::WikipediaComposersImporter*");
Chris@0 492 qRegisterMetaType<WikipediaWorksImporter *>
Chris@0 493 ("ClassicalData::WikipediaWorksImporter*");
Chris@0 494 qRegisterMetaType<WikipediaWorksKImporter *>
Chris@0 495 ("ClassicalData::WikipediaWorksKImporter*");
Chris@0 496 qRegisterMetaType<WikipediaWorksListImporter *>
Chris@0 497 ("ClassicalData::WikipediaWorksListImporter*");
Chris@0 498 qRegisterMetaType<HobokenImporter *>
Chris@0 499 ("ClassicalData::HobokenImporter*");
Chris@0 500
Chris@0 501 ObjectBuilder::getInstance()->registerClass
Chris@0 502 <HistoricalEvent>("ClassicalData::HistoricalEvent*");
Chris@0 503 ObjectBuilder::getInstance()->registerClass
Chris@0 504 <Birth>("ClassicalData::Birth*");
Chris@0 505 ObjectBuilder::getInstance()->registerClass
Chris@0 506 <Death>("ClassicalData::Death*");
Chris@0 507 ObjectBuilder::getInstance()->registerClass
Chris@0 508 <Composition>("ClassicalData::Composition*");
Chris@0 509 ObjectBuilder::getInstance()->registerClass
Chris@0 510 <Work, QObject>("ClassicalData::Work*");
Chris@0 511 ObjectBuilder::getInstance()->registerClass
Chris@0 512 <Movement, QObject>("ClassicalData::Movement*");
Chris@0 513 ObjectBuilder::getInstance()->registerClass
Chris@0 514 <Composer, QObject>("ClassicalData::Composer*");
Chris@0 515 ObjectBuilder::getInstance()->registerClass
Chris@0 516 <Document, QObject>("ClassicalData::Document*");
Chris@0 517 ObjectBuilder::getInstance()->registerClass
Chris@0 518 <Form, QObject>("ClassicalData::Form*");
Chris@0 519
Chris@0 520 ObjectBuilder::getInstance()->registerClass
Chris@0 521 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
Chris@0 522 ObjectBuilder::getInstance()->registerClass
Chris@0 523 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
Chris@0 524 ObjectBuilder::getInstance()->registerClass
Chris@0 525 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
Chris@0 526 ObjectBuilder::getInstance()->registerClass
Chris@0 527 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
Chris@0 528 ObjectBuilder::getInstance()->registerClass
Chris@0 529 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
Chris@0 530 ObjectBuilder::getInstance()->registerClass
Chris@0 531 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
Chris@0 532 ObjectBuilder::getInstance()->registerClass
Chris@0 533 <HobokenImporter>("ClassicalData::HobokenImporter*");
Chris@0 534
Chris@0 535 ContainerBuilder::getInstance()->registerContainer
Chris@0 536 <QString, QSet<QString> >
Chris@0 537 ("QString", "QSet<QString>", ContainerBuilder::SetKind);
Chris@0 538
Chris@0 539 ContainerBuilder::getInstance()->registerContainer
Chris@0 540 <Work*, QSet<Work*> >
Chris@0 541 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
Chris@0 542 ContainerBuilder::SetKind);
Chris@0 543
Chris@0 544 ContainerBuilder::getInstance()->registerContainer
Chris@0 545 <Movement*, QSet<Movement*> >
Chris@0 546 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
Chris@0 547 ContainerBuilder::SetKind);
Chris@0 548
Chris@0 549 ContainerBuilder::getInstance()->registerContainer
Chris@0 550 <Document*, QSet<Document*> >
Chris@0 551 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
Chris@0 552 ContainerBuilder::SetKind);
Chris@0 553
Chris@0 554 ContainerBuilder::getInstance()->registerContainer
Chris@0 555 <Form*, QSet<Form*> >
Chris@0 556 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
Chris@0 557 ContainerBuilder::SetKind);
Chris@0 558
Chris@0 559 BasicStore *store = BasicStore::load("file:importers.ttl");
Chris@0 560 ObjectMapper mapper(store);
Chris@0 561 QObject *parentObject = mapper.loadAllObjects(new QObject());
Chris@0 562
Chris@0 563 BasicStore *outstore = new BasicStore();
Chris@0 564 ObjectMapper outmapper(outstore);
Chris@0 565
Chris@0 566 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
Chris@0 567
Chris@0 568 outstore->addPrefix("type", outmapper.getObjectTypePrefix());
Chris@0 569 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/");
Chris@0 570 outstore->addPrefix("property", outmapper.getPropertyPrefix());
Chris@0 571 outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
Chris@0 572 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
Chris@0 573 outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
Chris@0 574 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
Chris@0 575 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
Chris@0 576 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
Chris@0 577 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
Chris@0 578
Chris@0 579 outmapper.addPropertyMapping("ClassicalData::Composer", "pages",
Chris@0 580 outstore->expand("foaf:page"));
Chris@0 581 outmapper.addPropertyMapping("ClassicalData::Composer", "name",
Chris@0 582 outstore->expand("foaf:name"));
Chris@0 583 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases",
Chris@0 584 outstore->expand("property:also_known_as"));
Chris@0 585 outmapper.addPropertyMapping("ClassicalData::Document", "topic",
Chris@0 586 outstore->expand("foaf:primaryTopic"));
Chris@0 587
Chris@0 588 outmapper.addTypeMapping("ClassicalData::Work",
Chris@0 589 outstore->expand("mo:MusicalWork"));
Chris@0 590 outmapper.addPropertyMapping("ClassicalData::Work", "composition",
Chris@0 591 outstore->expand("mo:composed_in"));
Chris@0 592 outmapper.addPropertyMapping("ClassicalData::Work", "opus",
Chris@0 593 outstore->expand("mo:opus"));
Chris@0 594 outmapper.addPropertyMapping("ClassicalData::Work", "k6",
Chris@0 595 outstore->expand("mo:k6"));
Chris@0 596 outmapper.addPropertyMapping("ClassicalData::Work", "bwv",
Chris@0 597 outstore->expand("mo:bwv"));
Chris@0 598 outmapper.addPropertyMapping("ClassicalData::Work", "number",
Chris@0 599 outstore->expand("mo:number"));
Chris@0 600 outmapper.addPropertyMapping("ClassicalData::Work", "partOf",
Chris@0 601 outstore->expand("dc:isPartOf"));
Chris@0 602 outmapper.addPropertyMapping("ClassicalData::Work", "parts",
Chris@0 603 outstore->expand("dc:hasPart"));
Chris@0 604 outmapper.addPropertyMapping("ClassicalData::Work", "pages",
Chris@0 605 outstore->expand("foaf:page"));
Chris@0 606 outmapper.addPropertyMapping("ClassicalData::Work", "forms",
Chris@0 607 outstore->expand("property:form"));
Chris@0 608 outmapper.addPropertyMapping("ClassicalData::Work", "key",
Chris@0 609 outstore->expand("mo:key"));
Chris@0 610 outmapper.addPropertyMapping("ClassicalData::Work", "aliases",
Chris@0 611 outstore->expand("property:also_known_as"));
Chris@0 612 outmapper.addPropertyMapping("ClassicalData::Work", "name",
Chris@0 613 outstore->expand("dc:title"));
Chris@0 614
Chris@0 615 outmapper.addTypeMapping("ClassicalData::Composition",
Chris@0 616 outstore->expand("mo:Composition"));
Chris@0 617 outmapper.addPropertyMapping("ClassicalData::Composition", "composer",
Chris@0 618 outstore->expand("mo:composer"));
Chris@0 619 outmapper.addPropertyMapping("ClassicalData::Composition", "works",
Chris@0 620 outstore->expand("mo:produced_work"));
Chris@0 621
Chris@0 622 outstore->add(Triple("classical:Composer", "a",
Chris@0 623 outstore->expand("owl:Class")));
Chris@0 624 outstore->add(Triple("classical:Composer", "rdfs:subClassOf",
Chris@0 625 outstore->expand("mo:MusicArtist")));
Chris@0 626
Chris@0 627 QList<Importer *> importers = parentObject->findChildren<Importer *>();
Chris@0 628 std::cerr << "have " << importers.size() << " importers" << std::endl;
Chris@0 629
Chris@0 630 ComposerMap composers;
Chris@0 631
Chris@0 632 QList<Composer *> dated;
Chris@0 633 QList<Composer *> undated;
Chris@0 634
Chris@0 635 QList<Work *> works;
Chris@0 636 QList<Composition *> compositions;
Chris@0 637 QList<QObject *> other;
Chris@0 638
Chris@0 639 foreach (Importer *importer, importers) {
Chris@0 640 QObjectList objects = importer->getImportedObjects();
Chris@0 641 foreach (QObject *o, objects) {
Chris@0 642 Composer *c;
Chris@0 643 if ((c = qobject_cast<Composer *>(o))) {
Chris@0 644 addMiscExpansions(c);
Chris@0 645 asciify(c);
Chris@0 646 if (c->birth() || c->death()) dated.push_back(c);
Chris@0 647 else undated.push_back(c);
Chris@0 648 continue;
Chris@0 649 }
Chris@0 650 Work *w;
Chris@0 651 if ((w = qobject_cast<Work *>(o))) {
Chris@0 652 asciify(w);
Chris@0 653 works.push_back(w);
Chris@0 654 continue;
Chris@0 655 }
Chris@0 656 Composition *cn;
Chris@0 657 if ((cn = qobject_cast<Composition *>(o))) {
Chris@0 658 compositions.push_back(cn);
Chris@0 659 continue;
Chris@0 660 }
Chris@0 661 }
Chris@0 662 }
Chris@0 663
Chris@0 664 // get all the dated composers merged before attempting to match
Chris@0 665 // the undated ones
Chris@0 666 foreach (Composer *c, dated) {
Chris@0 667 mergeComposer(c, composers);
Chris@0 668 }
Chris@0 669 foreach (Composer *c, undated) {
Chris@0 670 mergeComposer(c, composers);
Chris@0 671 }
Chris@0 672
Chris@0 673 QObjectList toStore;
Chris@0 674
Chris@0 675 QSet<Composer *> cset;
Chris@0 676 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
Chris@0 677 foreach (Composer *c, i.value()) {
Chris@0 678 if (!cset.contains(c)) {
Chris@0 679 assignUri(outstore, c);
Chris@0 680 toStore.push_back(c);
Chris@0 681 cset.insert(c);
Chris@0 682 }
Chris@0 683 foreach (Document *d, c->pages()) {
Chris@0 684 QString s = d->uri().toString();
Chris@0 685 addDbpediaResource(outstore, c, s);
Chris@0 686 }
Chris@0 687 }
Chris@0 688 }
Chris@0 689
Chris@0 690 QSet<QString> storedUris;
Chris@0 691
Chris@0 692 foreach (Work *w, works) {
Chris@0 693 Composition *cn = w->composition();
Chris@0 694 if (!cn) continue;
Chris@0 695 if (!cn->composer()) {
Chris@0 696 QString cname = cn->composerName();
Chris@0 697 if (cname != "") {
Chris@0 698 if (!composers.contains(cname.toLower())) {
Chris@0 699 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 700 } else {
Chris@0 701 QSet<Composer *> cs = composers[cname.toLower()];
Chris@0 702 if (cs.empty()) {
Chris@0 703 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
Chris@0 704 } else if (cs.size() > 1) {
Chris@0 705 DEBUG << "Failed to assign Composition to composer: "
Chris@0 706 << cs.size() << " composers match name " << cname << endl;
Chris@0 707 } else {
Chris@0 708 cn->setComposer(*cs.begin());
Chris@0 709 }
Chris@0 710 }
Chris@0 711 } else {
Chris@0 712 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
Chris@0 713 }
Chris@0 714 }
Chris@0 715
Chris@0 716 if (cn->composer()) {
Chris@0 717 assignUri(outstore, w, cn->composer());
Chris@0 718 }
Chris@0 719
Chris@0 720 foreach (Document *d, w->pages()) {
Chris@0 721 QString s = d->uri().toString();
Chris@0 722 addDbpediaResource(outstore, w, s);
Chris@0 723 toStore.push_back(d);
Chris@0 724 }
Chris@0 725
Chris@0 726 QString u = w->property("uri").toUrl().toString();
Chris@0 727 if (u == "" || !storedUris.contains(u)) {
Chris@0 728 toStore.push_back(w);
Chris@0 729 if (u != "") storedUris.insert(u);
Chris@0 730 }
Chris@0 731 }
Chris@0 732
Chris@0 733 try {
Chris@0 734 outmapper.storeAllObjects(toStore);
Chris@0 735
Chris@0 736 } catch (RDFException e) {
Chris@0 737 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
Chris@0 738 }
Chris@0 739
Chris@0 740 DEBUG << "Stored, now saving" << endl;
Chris@0 741
Chris@0 742 outstore->save("test-out.ttl");
Chris@0 743
Chris@0 744 DEBUG << "Saved" << endl;
Chris@0 745
Chris@0 746
Chris@0 747 QMultiMap<QString, Composer *> cmap;
Chris@0 748 foreach (Composer *c, cset) {
Chris@0 749 QString n = c->getSortName(true);
Chris@0 750 cmap.insert(n, c);
Chris@0 751 }
Chris@0 752
Chris@0 753 std::cout << "Composers: " << cmap.size() << std::endl;
Chris@0 754
Chris@0 755 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 756 i != cmap.end(); ++i) {
Chris@0 757
Chris@0 758 QString n = i.key();
Chris@0 759 Composer *c = i.value();
Chris@0 760
Chris@0 761 std::cout << n.toStdString();
Chris@0 762
Chris@0 763 QString d = c->getDisplayDates();
Chris@0 764 if (d != "") std::cout << " (" << d.toStdString() << ")";
Chris@0 765 std::cout << std::endl;
Chris@0 766 }
Chris@0 767
Chris@0 768 std::cout << std::endl;
Chris@0 769
Chris@0 770 std::cout << "Works by composer:" << std::endl;
Chris@0 771
Chris@0 772 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
Chris@0 773 i != cmap.end(); ++i) {
Chris@0 774
Chris@0 775 QString n = i.key();
Chris@0 776 Composer *c = i.value();
Chris@0 777
Chris@0 778 std::set<Work *, Work::Ordering> wmap;
Chris@0 779 foreach (Work *w, works) {
Chris@0 780 Composition *cn = w->composition();
Chris@0 781 if (!cn) continue;
Chris@0 782 if (cn->composer() != c) continue;
Chris@0 783 if (w->partOf()) continue;
Chris@0 784 wmap.insert(w);
Chris@0 785 }
Chris@0 786
Chris@0 787 if (wmap.empty()) continue;
Chris@0 788
Chris@0 789 std::cout << n.toStdString() << std::endl;
Chris@0 790
Chris@0 791 foreach (Work *w, wmap) {
Chris@0 792 std::cout << " * ";
Chris@0 793 std::cout << w->name().toStdString();
Chris@0 794 if (w->catalogue() != "") {
Chris@0 795 std::cout << " [" << w->catalogue().toStdString() << "]";
Chris@0 796 }
Chris@0 797 if (w->opus() != "") {
Chris@0 798 std::cout << " [op. " << w->opus().toStdString() << "]";
Chris@0 799 }
Chris@0 800 std::cout << std::endl;
Chris@0 801 std::set<Work *, Work::Ordering> orderedParts;
Chris@0 802 foreach (Work *ww, w->parts()) {
Chris@0 803 orderedParts.insert(ww);
Chris@0 804 }
Chris@0 805 foreach (Work *ww, orderedParts) {
Chris@0 806 std::cout << " ";
Chris@0 807 if (ww->number() != "") {
Chris@0 808 std::cout << ww->number().toStdString() << ". ";
Chris@0 809 }
Chris@0 810 std::cout << ww->name().toStdString();
Chris@0 811 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
Chris@0 812 std::cout << " [" << ww->catalogue().toStdString() << "]";
Chris@0 813 }
Chris@0 814 if (ww->opus() != "" && ww->opus() != w->opus()) {
Chris@0 815 std::cout << " [op. " << ww->opus().toStdString() << "]";
Chris@0 816 }
Chris@0 817 std::cout << std::endl;
Chris@0 818 }
Chris@0 819 }
Chris@0 820
Chris@0 821 std::cout << std::endl;
Chris@0 822 }
Chris@0 823
Chris@0 824 delete outstore;
Chris@0 825
Chris@0 826 DEBUG << "Done" << endl;
Chris@0 827
Chris@0 828
Chris@0 829 }
Chris@0 830
Chris@0 831