comparison import/Import.cpp @ 1:29ca5974905d classical-rdf

* More work on a nice tidy import; get some sensible URIs etc
author Chris Cannam
date Thu, 03 Dec 2009 15:42:10 +0000
parents import/Test.cpp@e8f4c2b55fd8
children ff067a1e7e3d
comparison
equal deleted inserted replaced
0:e8f4c2b55fd8 1:29ca5974905d
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 #include "Objects.h"
4
5 #include <dataquay/BasicStore.h>
6 #include <dataquay/RDFException.h>
7 #include <dataquay/objectmapper/ObjectMapper.h>
8 #include <dataquay/objectmapper/ObjectBuilder.h>
9 #include <dataquay/objectmapper/ContainerBuilder.h>
10
11 #include "ImportClassicalComposersOrg.h"
12 #include "ImportClassicalDotNet.h"
13 #include "ImportWikipediaComposers.h"
14 #include "ImportWikipediaWorks.h"
15 #include "ImportWikipediaWorksK.h"
16 #include "ImportWikipediaWorksList.h"
17 #include "ImportHoboken.h"
18
19 #include <dataquay/Debug.h>
20
21 using namespace ClassicalData;
22 using namespace Dataquay;
23
24 #include <iostream>
25 #include <set>
26
27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
28
29 void
30 addMiscExpansions(Composer *c)
31 {
32 QString n = c->name();
33
34 DEBUG << "addMiscExpansions: n = " << n << endl;
35
36 // lovely hard-coded special cases go here! some of these are
37 // needed for works->composer assignments
38 if (n == "Balakirev, Milii") {
39 c->addAlias("Mily Balakirev");
40 }
41 if (n.startsWith("Cui, C")) {
42 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
43 }
44 if (n == "Handel, George Frideric") {
45 c->addAlias("Handel, Georg Friedrich");
46 c->addAlias("Handel");
47 }
48 if (n == "Prokofiev, Sergey") {
49 c->addAlias("Prokofieff, Sergei");
50 c->addAlias("Sergei Prokofieff");
51 }
52 if (n == "Rossini, Gioacchino") {
53 c->addAlias("Rossini, Gioachino");
54 c->addAlias("Gioachino Rossini");
55 }
56 if (n == "Edwards, Richard") {
57 c->addAlias("Edwardes, Richard");
58 c->addAlias("Richard Edwardes");
59 c->addAlias("Richard Edwards");
60 }
61 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") {
62 c->addAlias("Nikolai Rimsky-Korsakov");
63 }
64 if (n.startsWith("Piccinni, Nico")) {
65 c->addAlias(n);
66 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262"));
67 }
68 if (n == "Tchaikovsky, Pyotr Ilyich") {
69 c->addAlias("Tchaikovsky, Piotr Ilyitch");
70 }
71 if (n == "Wilhelm Stenhammar") {
72 c->addAlias("Stenhammar, Vilhelm Eugene");
73 c->setName("Stenhammar, Wilhelm");
74 c->addAlias(n);
75 }
76 if (n == "Mercadante, Saverio Rafaele") {
77 c->addAlias("Mercadante, Giuseppe");
78 }
79 if (n == "Johann Wenzel Anton Stamitz") {
80 c->addAlias(n);
81 c->setName("Stamitz, Johann Wenzel Anton");
82 c->addAlias("Stamitz, Jan Vaclav");
83 }
84 if (n == "Mario Castelnuovo-Tedesco") {
85 c->addAlias("Castelnuovo Tedesco, Mario");
86 }
87 if (n == "Mayr, Simon") {
88 c->addAlias("Mayr");
89 }
90
91 n.replace(", Sr.", " Sr.");
92 n.replace(", Jr.", " Jr.");
93
94 int comma = n.indexOf(", ");
95 if (comma > 0 && comma + 2 < n.length()) {
96
97 QString left = n.left(comma);
98 QString right = n.right(n.length() - comma - 2);
99
100 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
101 if (jrsr.indexIn(right) >= 0) {
102 left = left + jrsr.cap(1);
103 right = right.left(right.length()-jrsr.matchedLength());
104 }
105 n = right + " " + left;
106 }
107
108 if (n != c->name()) c->addAlias(n);
109
110 if (n.contains("Sergey")) {
111 QString nn(n);
112 nn.replace("Sergey", "Sergei");
113 c->addAlias(nn);
114 } else if (n.contains("Sergei")) {
115 QString nn(n);
116 nn.replace("Sergei", "Sergey");
117 c->addAlias(nn);
118 }
119
120 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
121 if (sr.indexIn(n) >= 0) {
122 QString nr = n;
123 nr.replace(sr.pos(0), sr.matchedLength(), " I");
124 nr.replace(" ", " ");
125 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
126 c->addAlias(nr);
127 }
128 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
129 if (jr.indexIn(n) >= 0) {
130 QString nr = n;
131 nr.replace(jr.pos(0), jr.matchedLength(), " II");
132 nr.replace(" ", " ");
133 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
134 c->addAlias(nr);
135 }
136 QString nr = n;
137 nr.replace("(I)", "I");
138 nr.replace("(II)", "II");
139 nr.replace("(III)", "III");
140 c->addAlias(nr);
141 }
142
143 bool namesFuzzyMatch(QString an, Composer *b)
144 {
145 // ew!
146
147 QString bn = b->name();
148 if (bn == an) return true;
149 if (b->aliases().contains(an)) return true;
150 int aSurnameIndex = 0, bSurnameIndex = 0;
151 if (an.contains(",")) {
152 an.replace(",", "");
153 } else {
154 aSurnameIndex = -1;
155 }
156 if (bn.contains(",")) {
157 bn.replace(",", "");
158 } else {
159 bSurnameIndex = -1;
160 }
161 QStringList nl = an.split(QRegExp("[ -]"));
162 QStringList bnl = bn.split(QRegExp("[ -]"));
163 int matchCount = 0;
164 QString surnameMatch = "";
165 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
166 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
167 if (nl[aSurnameIndex][0].isUpper() &&
168 nl[aSurnameIndex] != "Della" &&
169 nl[aSurnameIndex] == bnl[bSurnameIndex]) {
170 surnameMatch = nl[aSurnameIndex];
171 }
172 foreach (QString elt, nl) {
173 if (!elt[0].isUpper() || elt == "Della") continue;
174 if (bnl.contains(elt)) {
175 ++matchCount;
176 continue;
177 }
178 }
179 if (matchCount > 1 && surnameMatch != "") {
180 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
181 return true;
182 }
183 return false;
184 }
185
186 bool
187 hasBetterName(Composer *c, Composer *other)
188 {
189 if (c->name() == other->name()) return false;
190
191 // Try to guess which of c and other is more likely to have a good
192 // "canonical form" of the composer's name
193
194 if (c->name().startsWith("van ")) {
195 return false; // wrong choice of sort for e.g. LvB; should be
196 // Beethoven, Ludwig van, not van Beethoven, Ludwig
197 }
198 if (other->name().startsWith("van ")) {
199 return true;
200 }
201
202 if (c->aliases().size() != other->aliases().size()) {
203 // a rather weak heuristic
204 return c->aliases().size() > other->aliases().size();
205 }
206
207 if (c->name().contains(',') && !other->name().contains(',')) {
208 // another rather weak heuristic
209 return true;
210 }
211
212 return false;
213 }
214
215 void mergeComposer(Composer *c, ComposerMap &composers)
216 {
217 QString name = c->name();
218
219 QSet<QString> allNames = c->aliases();
220 allNames.insert(name);
221
222 QString dates;
223 if (c->birth()) {
224 if (c->death()) {
225 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
226 } else {
227 dates = QString("%1-").arg(c->birth()->year());
228 }
229 }
230 if (dates != "") {
231 allNames.insert(dates);
232 }
233
234 QSet<Composer *> matches;
235
236 foreach (QString candidateName, allNames) {
237 QString key = candidateName.toLower();
238 if (composers.contains(key)) {
239 foreach (Composer *candidate, composers[key]) {
240 if (candidateName == dates) {
241 if (!namesFuzzyMatch(c->name(), candidate) &&
242 !namesFuzzyMatch(candidate->name(), c)) {
243 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
244 continue;
245 } else {
246 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
247 }
248 } else {
249 if (!c->datesMatch(candidate)) {
250 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
251 continue;
252 }
253 }
254 matches.insert(candidate);
255 }
256 }
257 }
258
259 if (matches.empty()) {
260 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
261
262 if (!c->birth() && !c->death()) {
263 // laboriously look for fuzzy match across _all_ composers
264 for (ComposerMap::iterator i = composers.begin();
265 i != composers.end(); ++i) {
266 foreach (Composer *candidate, *i) {
267 if (namesFuzzyMatch(c->name(), candidate)) {
268 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
269 matches.insert(candidate);
270 break;
271 }
272 }
273 if (!matches.empty()) break;
274 }
275 }
276
277 if (matches.empty()) {
278 foreach (QString candidateName, allNames) {
279 composers[candidateName.toLower()].insert(c);
280 DEBUG << "added for alias or date " << candidateName << endl;
281 }
282 return;
283 }
284 }
285
286 if (matches.size() > 1) {
287 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
288 }
289
290 Composer *other = *matches.begin();
291
292 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
293
294 if (hasBetterName(c, other)) {
295 other->addAlias(other->name());
296 other->setName(c->name());
297 } else {
298 other->addAlias(c->name());
299 }
300 composers[c->name().toLower()].insert(other);
301 DEBUG << "linking from alias " << c->name() << endl;
302
303 foreach (QString alias, c->aliases()) {
304 if (alias != other->name() &&
305 !other->aliases().contains(alias)) {
306 other->addAlias(alias);
307 composers[alias.toLower()].insert(other);
308 DEBUG << "linking from alias " << alias << endl;
309 }
310 }
311
312 foreach (Document *d, c->pages()) {
313 bool found = false;
314 foreach (Document *dd, other->pages()) {
315 if (d->uri() == dd->uri()) {
316 found = true;
317 break;
318 }
319 }
320 if (!found) {
321 d->setTopic(other);
322 other->addPage(d);
323 }
324 }
325
326 //!!! actually the "approximate" bits of the following are bogus;
327 // a source reporting birth or death date as approx is probably
328 // more accurate than one reporting an exact date
329
330 if (c->birth()) {
331 if (!other->birth() || other->birth()->approximate()) {
332 other->setBirth(c->birth());
333 }
334 }
335
336 if (c->death()) {
337 if (!other->death() || other->death()->approximate()) {
338 other->setDeath(c->death());
339 }
340 }
341
342 if (c->gender() != "") other->setGender(c->gender());
343 if (c->nationality() != "") other->setNationality(c->nationality());
344 if (c->remarks() != "") other->setRemarks(c->remarks());
345 if (c->period() != "") other->setPeriod(c->period());
346
347 }
348
349 QString
350 asciify(QString field)
351 {
352 // accented characters etc -- add "ascii version" for dumb search purposes
353 QString ascii;
354 for (int i = 0; i < field.length(); ++i) {
355 QString dc = field[i].decomposition();
356 if (dc != "") ascii += dc[0];
357 else if (field[i] == QChar(0x00DF)) {
358 ascii += "ss";
359 } else {
360 ascii += field[i];
361 }
362 }
363 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
364 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
365 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
366 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
367 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
368 return ascii;
369 }
370
371 void
372 asciify(Composer *c)
373 {
374 QString n = c->name();
375 QString asc = asciify(n);
376 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
377 foreach (QString alias, c->aliases()) {
378 asc = asciify(alias);
379 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
380 }
381 }
382
383 void
384 asciify(Work *w)
385 {
386 QString n = w->name();
387 QString asc = asciify(n);
388 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
389 foreach (QString alias, w->aliases()) {
390 asc = asciify(alias);
391 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
392 }
393 }
394
395 void
396 assignUri(Store *s, Composer *c)
397 {
398 static QSet<QString> convSet;
399 QString conv = c->name();
400 if (!conv.contains(",")) {
401 QStringList sl = conv.split(" ");
402 if (!sl.empty()) {
403 sl.push_front(sl[sl.size()-1]);
404 sl.removeLast();
405 conv = sl.join(" ");
406 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
407 }
408 }
409 conv = asciify(conv);
410 conv.replace(" ", "_");
411 conv.replace("-", "_");
412 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
413 conv = conv.toLower();
414 QString initial = conv;
415 int i = 2;
416 while (convSet.contains(conv)) {
417 conv = QString("%1__%2").arg(initial).arg(i);
418 i++;
419 }
420 convSet.insert(conv);
421 c->setProperty("uri", s->expand(":composer/" + conv));
422 }
423
424 void
425 assignUri(Store *s, Work *w, Composer *c)
426 {
427 QString pfx = c->property("uri").toUrl().toString();
428 DEBUG << "pfx = " << pfx << endl;
429 if (!pfx.contains("composer/")) pfx = "";
430
431 static QSet<QString> convSet;
432
433 QString conv = w->catalogue();
434 if (conv == "") conv = w->opus();
435 conv = conv.replace(".", "");
436 bool hasOpus = (conv != "");
437 if (conv == "") conv = w->name().toLower();
438 if (w->number() != "") conv = conv + "_no" + w->number();
439 conv = asciify(conv);
440 conv.replace(" ", "_");
441 conv.replace("-", "_");
442 conv.replace(":", "_");
443 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
444
445 if (pfx != "") conv = pfx + "/work/" + conv;
446
447 // I think actually for works we want to merge duplicates rather than
448 // assign them separate URIs, _unless_ they lack a viable opus number
449 if (!hasOpus) {
450 QString initial = conv;
451 int i = 2;
452 while (convSet.contains(conv)) {
453 conv = QString("%1__%2").arg(initial).arg(i);
454 i++;
455 }
456 }
457 convSet.insert(conv);
458
459 w->setProperty("uri", conv);
460 }
461
462 void
463 addDbpediaResource(Store *store, QObject *o, QString s)
464 {
465 QUrl u = o->property("uri").toUrl();
466 if (u == QUrl()) return;
467 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
468 store->add(Triple(u,
469 "mo:wikipedia",
470 QUrl(s)));
471 s.replace("http://en.wikipedia.org/wiki/",
472 "http://dbpedia.org/resource/");
473 store->add(Triple(u,
474 "owl:sameAs",
475 QUrl(s)));
476 }
477 }
478
479 int main(int argc, char **argv)
480 {
481 qRegisterMetaType<HistoricalEvent *>
482 ("ClassicalData::HistoricalEvent*");
483 qRegisterMetaType<Birth *>
484 ("ClassicalData::Birth*");
485 qRegisterMetaType<Death *>
486 ("ClassicalData::Death*");
487 qRegisterMetaType<Composition *>
488 ("ClassicalData::Composition*");
489 qRegisterMetaType<Work *>
490 ("ClassicalData::Work*");
491 qRegisterMetaType<Movement *>
492 ("ClassicalData::Movement*");
493 qRegisterMetaType<Composer *>
494 ("ClassicalData::Composer*");
495 qRegisterMetaType<Document *>
496 ("ClassicalData::Document*");
497 qRegisterMetaType<Form *>
498 ("ClassicalData::Form*");
499 qRegisterMetaType<QSet<Work *> >
500 ("QSet<ClassicalData::Work*>");
501 qRegisterMetaType<QSet<Movement *> >
502 ("QSet<ClassicalData::Movement*>");
503 qRegisterMetaType<QSet<Document *> >
504 ("QSet<ClassicalData::Document*>");
505 qRegisterMetaType<QSet<Form *> >
506 ("QSet<ClassicalData::Form*>");
507 qRegisterMetaType<QSet<QString> >
508 ("QSet<QString>");
509
510 qRegisterMetaType<ClassicalComposersOrgImporter *>
511 ("ClassicalData::ClassicalComposersOrgImporter*");
512 qRegisterMetaType<ClassicalDotNetImporter *>
513 ("ClassicalData::ClassicalDotNetImporter*");
514 qRegisterMetaType<WikipediaComposersImporter *>
515 ("ClassicalData::WikipediaComposersImporter*");
516 qRegisterMetaType<WikipediaWorksImporter *>
517 ("ClassicalData::WikipediaWorksImporter*");
518 qRegisterMetaType<WikipediaWorksKImporter *>
519 ("ClassicalData::WikipediaWorksKImporter*");
520 qRegisterMetaType<WikipediaWorksListImporter *>
521 ("ClassicalData::WikipediaWorksListImporter*");
522 qRegisterMetaType<HobokenImporter *>
523 ("ClassicalData::HobokenImporter*");
524
525 ObjectBuilder::getInstance()->registerClass
526 <HistoricalEvent>("ClassicalData::HistoricalEvent*");
527 ObjectBuilder::getInstance()->registerClass
528 <Birth>("ClassicalData::Birth*");
529 ObjectBuilder::getInstance()->registerClass
530 <Death>("ClassicalData::Death*");
531 ObjectBuilder::getInstance()->registerClass
532 <Composition>("ClassicalData::Composition*");
533 ObjectBuilder::getInstance()->registerClass
534 <Work, QObject>("ClassicalData::Work*");
535 ObjectBuilder::getInstance()->registerClass
536 <Movement, QObject>("ClassicalData::Movement*");
537 ObjectBuilder::getInstance()->registerClass
538 <Composer, QObject>("ClassicalData::Composer*");
539 ObjectBuilder::getInstance()->registerClass
540 <Document, QObject>("ClassicalData::Document*");
541 ObjectBuilder::getInstance()->registerClass
542 <Form, QObject>("ClassicalData::Form*");
543
544 ObjectBuilder::getInstance()->registerClass
545 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
546 ObjectBuilder::getInstance()->registerClass
547 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
548 ObjectBuilder::getInstance()->registerClass
549 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
550 ObjectBuilder::getInstance()->registerClass
551 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
552 ObjectBuilder::getInstance()->registerClass
553 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
554 ObjectBuilder::getInstance()->registerClass
555 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
556 ObjectBuilder::getInstance()->registerClass
557 <HobokenImporter>("ClassicalData::HobokenImporter*");
558
559 ContainerBuilder::getInstance()->registerContainer
560 <QString, QSet<QString> >
561 ("QString", "QSet<QString>", ContainerBuilder::SetKind);
562
563 ContainerBuilder::getInstance()->registerContainer
564 <Work*, QSet<Work*> >
565 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
566 ContainerBuilder::SetKind);
567
568 ContainerBuilder::getInstance()->registerContainer
569 <Movement*, QSet<Movement*> >
570 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
571 ContainerBuilder::SetKind);
572
573 ContainerBuilder::getInstance()->registerContainer
574 <Document*, QSet<Document*> >
575 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
576 ContainerBuilder::SetKind);
577
578 ContainerBuilder::getInstance()->registerContainer
579 <Form*, QSet<Form*> >
580 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
581 ContainerBuilder::SetKind);
582
583 BasicStore *store = BasicStore::load("file:importers.ttl");
584 ObjectMapper mapper(store);
585 QObject *parentObject = mapper.loadAllObjects(new QObject());
586
587 BasicStore *outstore = new BasicStore();
588 outstore->setBaseUri("http://dbtune.org/classical/resource/");
589 ObjectMapper outmapper(outstore);
590
591 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
592
593 outmapper.setObjectTypePrefix("http://dbtune.org/classical/resource/");
594 outmapper.setPropertyPrefix("http://dbtune.org/classical/resource/vocab/");
595 outmapper.setRelationshipPrefix("http://dbtune.org/classical/resource/vocab/relationship/");
596
597 outstore->addPrefix("type", outmapper.getObjectTypePrefix());
598 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "type/");
599 outstore->addPrefix("property", outmapper.getPropertyPrefix());
600 outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
601
602 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
603 outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
604 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
605 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
606 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
607 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
608 outstore->addPrefix("db", "http://dbtune.org/musicbrainz/resource/");
609 outstore->addPrefix("dbv", "http://dbtune.org/musicbrainz/resource/vocab/");
610
611 outmapper.addTypeMapping("ClassicalData::Composer", "classical:Composer");
612 outmapper.addPropertyMapping("ClassicalData::Composer", "pages", "foaf:page");
613 outmapper.addPropertyMapping("ClassicalData::Composer", "name", "foaf:name");
614 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", "dbv:alias");
615 outmapper.addPropertyMapping("ClassicalData::Composer", "birth", "property:birth");
616 outmapper.addPropertyMapping("ClassicalData::Composer", "death", "property:death");
617
618 outmapper.addTypeMapping("ClassicalData::Birth", "bio:Birth");
619 outmapper.addTypeMapping("ClassicalData::Death", "bio:Death");
620 outmapper.addPropertyMapping("ClassicalData::Birth", "year", "bio:date");
621 outmapper.addPropertyMapping("ClassicalData::Death", "year", "bio:date");
622 outmapper.addPropertyMapping("ClassicalData::Birth", "place", "bio:place");
623 outmapper.addPropertyMapping("ClassicalData::Death", "place", "bio:place");
624
625 outmapper.addTypeMapping("ClassicalData::Document", "foaf:Document");
626 outmapper.addPropertyMapping("ClassicalData::Document", "topic", "foaf:primaryTopic");
627
628 outmapper.addTypeMapping("ClassicalData::Work", "mo:MusicalWork");
629
630 outmapper.addPropertyMapping("ClassicalData::Work", "composition", "mo:composed_in");
631 outmapper.addPropertyMapping("ClassicalData::Work", "opus", "mo:opus");
632 outmapper.addPropertyMapping("ClassicalData::Work", "catalogue", "mo:catalogue");
633 outmapper.addPropertyMapping("ClassicalData::Work", "number", "mo:number");
634 outmapper.addPropertyMapping("ClassicalData::Work", "partOf", "dc:isPartOf");
635 outmapper.addPropertyMapping("ClassicalData::Work", "parts", "dc:hasPart");
636 outmapper.addPropertyMapping("ClassicalData::Work", "pages", "foaf:page");
637 outmapper.addPropertyMapping("ClassicalData::Work", "forms", "property:form");
638 outmapper.addPropertyMapping("ClassicalData::Work", "key", "mo:key");
639 outmapper.addPropertyMapping("ClassicalData::Work", "aliases", "dbv:alias");
640 outmapper.addPropertyMapping("ClassicalData::Work", "name", "dc:title");
641
642 outmapper.addTypeMapping("ClassicalData::Composition", "mo:Composition");
643 outmapper.addPropertyMapping("ClassicalData::Composition", "composer", "mo:composer");
644 outmapper.addPropertyMapping("ClassicalData::Composition", "works", "mo:produced_work");
645
646 outstore->add(Triple("classical:Composer", "a", outstore->expand("owl:Class")));
647 outstore->add(Triple("classical:Composer", "rdfs:subClassOf", outstore->expand("mo:MusicArtist")));
648
649 outstore->add(Triple("property:birth", "a", outstore->expand("owl:ObjectProperty")));
650 outstore->add(Triple("property:birth", "rdfs:subPropertyOf", outstore->expand("bio:event")));
651
652 outstore->add(Triple("property:death", "a", outstore->expand("owl:ObjectProperty")));
653 outstore->add(Triple("property:death", "rdfs:subPropertyOf", outstore->expand("bio:event")));
654
655 QList<Importer *> importers = parentObject->findChildren<Importer *>();
656 std::cerr << "have " << importers.size() << " importers" << std::endl;
657
658 ComposerMap composers;
659
660 QList<Composer *> dated;
661 QList<Composer *> undated;
662
663 QList<Work *> works;
664 QList<Composition *> compositions;
665 QList<QObject *> other;
666
667 foreach (Importer *importer, importers) {
668 QObjectList objects = importer->getImportedObjects();
669 foreach (QObject *o, objects) {
670 Composer *c;
671 if ((c = qobject_cast<Composer *>(o))) {
672 addMiscExpansions(c);
673 asciify(c);
674 if (c->birth() || c->death()) dated.push_back(c);
675 else undated.push_back(c);
676 continue;
677 }
678 Work *w;
679 if ((w = qobject_cast<Work *>(o))) {
680 asciify(w);
681 works.push_back(w);
682 continue;
683 }
684 Composition *cn;
685 if ((cn = qobject_cast<Composition *>(o))) {
686 compositions.push_back(cn);
687 continue;
688 }
689 }
690 }
691
692 // get all the dated composers merged before attempting to match
693 // the undated ones
694 foreach (Composer *c, dated) {
695 mergeComposer(c, composers);
696 }
697 foreach (Composer *c, undated) {
698 mergeComposer(c, composers);
699 }
700
701 QObjectList toStore;
702
703 QSet<Composer *> cset;
704 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
705 foreach (Composer *c, i.value()) {
706 if (!cset.contains(c)) {
707 assignUri(outstore, c);
708 toStore.push_back(c);
709 cset.insert(c);
710 }
711 foreach (Document *d, c->pages()) {
712 QString s = d->uri().toString();
713 addDbpediaResource(outstore, c, s);
714 }
715 }
716 }
717
718 QSet<QString> storedUris;
719
720 foreach (Work *w, works) {
721 Composition *cn = w->composition();
722 if (!cn) continue;
723 if (!cn->composer()) {
724 QString cname = cn->composerName();
725 if (cname != "") {
726 if (!composers.contains(cname.toLower())) {
727 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
728 } else {
729 QSet<Composer *> cs = composers[cname.toLower()];
730 if (cs.empty()) {
731 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
732 } else if (cs.size() > 1) {
733 DEBUG << "Failed to assign Composition to composer: "
734 << cs.size() << " composers match name " << cname << endl;
735 } else {
736 cn->setComposer(*cs.begin());
737 }
738 }
739 } else {
740 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
741 }
742 }
743
744 if (cn->composer()) {
745 assignUri(outstore, w, cn->composer());
746 }
747
748 foreach (Document *d, w->pages()) {
749 QString s = d->uri().toString();
750 addDbpediaResource(outstore, w, s);
751 if (!storedUris.contains(s)) {
752 toStore.push_back(d);
753 storedUris.insert(s);
754 }
755 }
756
757 QString u = w->property("uri").toUrl().toString();
758 if (u == "" || !storedUris.contains(u)) {
759 toStore.push_back(w);
760 if (u != "") storedUris.insert(u);
761 }
762 }
763
764 try {
765 outmapper.storeAllObjects(toStore);
766
767 } catch (RDFException e) {
768 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
769 }
770
771 DEBUG << "Stored, now saving" << endl;
772
773 outstore->save("test-out.ttl");
774
775 DEBUG << "Saved" << endl;
776
777
778 QMultiMap<QString, Composer *> cmap;
779 foreach (Composer *c, cset) {
780 QString n = c->getSortName(true);
781 cmap.insert(n, c);
782 }
783
784 std::cout << "Composers: " << cmap.size() << std::endl;
785
786 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
787 i != cmap.end(); ++i) {
788
789 QString n = i.key();
790 Composer *c = i.value();
791
792 std::cout << n.toStdString();
793
794 QString d = c->getDisplayDates();
795 if (d != "") std::cout << " (" << d.toStdString() << ")";
796 std::cout << std::endl;
797 }
798
799 std::cout << std::endl;
800
801 std::cout << "Works by composer:" << std::endl;
802
803 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
804 i != cmap.end(); ++i) {
805
806 QString n = i.key();
807 Composer *c = i.value();
808
809 std::set<Work *, Work::Ordering> wmap;
810 foreach (Work *w, works) {
811 Composition *cn = w->composition();
812 if (!cn) continue;
813 if (cn->composer() != c) continue;
814 if (w->partOf()) continue;
815 wmap.insert(w);
816 }
817
818 if (wmap.empty()) continue;
819
820 std::cout << n.toStdString() << std::endl;
821
822 foreach (Work *w, wmap) {
823 std::cout << " * ";
824 std::cout << w->name().toStdString();
825 if (w->catalogue() != "") {
826 std::cout << " [" << w->catalogue().toStdString() << "]";
827 }
828 if (w->opus() != "") {
829 std::cout << " [op. " << w->opus().toStdString() << "]";
830 }
831 std::cout << std::endl;
832 std::set<Work *, Work::Ordering> orderedParts;
833 foreach (Work *ww, w->parts()) {
834 orderedParts.insert(ww);
835 }
836 foreach (Work *ww, orderedParts) {
837 std::cout << " ";
838 if (ww->number() != "") {
839 std::cout << ww->number().toStdString() << ". ";
840 }
841 std::cout << ww->name().toStdString();
842 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
843 std::cout << " [" << ww->catalogue().toStdString() << "]";
844 }
845 if (ww->opus() != "" && ww->opus() != w->opus()) {
846 std::cout << " [op. " << ww->opus().toStdString() << "]";
847 }
848 std::cout << std::endl;
849 }
850 }
851
852 std::cout << std::endl;
853 }
854
855 delete outstore;
856
857 DEBUG << "Done" << endl;
858
859
860 }
861
862