comparison import/Test.cpp @ 0:e8f4c2b55fd8 classical-rdf

* reorganise
author Chris Cannam
date Tue, 01 Dec 2009 17:50:41 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e8f4c2b55fd8
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
2
3 #include "Objects.h"
4
5 #include <dataquay/BasicStore.h>
6 #include <dataquay/RDFException.h>
7 #include <dataquay/objectmapper/ObjectMapper.h>
8 #include <dataquay/objectmapper/ObjectBuilder.h>
9 #include <dataquay/objectmapper/ContainerBuilder.h>
10
11 #include "ImportClassicalComposersOrg.h"
12 #include "ImportClassicalDotNet.h"
13 #include "ImportWikipediaComposers.h"
14 #include "ImportWikipediaWorks.h"
15 #include "ImportWikipediaWorksK.h"
16 #include "ImportWikipediaWorksList.h"
17 #include "ImportHoboken.h"
18
19 #include <dataquay/Debug.h>
20
21 using namespace ClassicalData;
22 using namespace Dataquay;
23
24 #include <iostream>
25 #include <set>
26
27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
28
29 bool datesMatch(Composer *a, Composer *b)
30 {
31 if (a->birth() && b->birth()) {
32 if (abs(a->birth()->year() - b->birth()->year()) > 1) {
33 if ((!a->birth()->approximate() && !b->birth()->approximate()) ||
34 (abs(a->birth()->year() - b->birth()->year()) > 10)) {
35 return false;
36 }
37 }
38 }
39 if (a->death() && b->death()) {
40 if (abs(a->death()->year() - b->death()->year()) > 1) {
41 if ((!a->death()->approximate() && !b->death()->approximate()) ||
42 (abs(a->death()->year() - b->death()->year()) > 10)) {
43 return false;
44 }
45 }
46 }
47 return true;
48 }
49
50 void
51 addMiscExpansions(Composer *c)
52 {
53 QString n = c->name();
54
55 DEBUG << "addMiscExpansions: n = " << n << endl;
56
57 // lovely hard-coded special cases go here! some of these are
58 // needed for works->composer assignments
59 if (n == "Balakirev, Milii") {
60 c->addAlias("Mily Balakirev");
61 }
62 if (n.startsWith("Cui, C")) {
63 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
64 }
65 if (n == "Handel, George Frideric") {
66 c->addAlias("Handel, Georg Friedrich");
67 c->addAlias("Handel");
68 }
69 if (n == "Mayr, Simon") {
70 c->addAlias("Mayr");
71 }
72
73 n.replace(", Sr.", " Sr.");
74 n.replace(", Jr.", " Jr.");
75
76 int comma = n.indexOf(", ");
77 if (comma > 0 && comma + 2 < n.length()) {
78
79 QString left = n.left(comma);
80 QString right = n.right(n.length() - comma - 2);
81
82 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
83 if (jrsr.indexIn(right) >= 0) {
84 left = left + jrsr.cap(1);
85 right = right.left(right.length()-jrsr.matchedLength());
86 }
87 n = right + " " + left;
88 }
89
90 if (n != c->name()) c->addAlias(n);
91
92 if (n.contains("Sergey")) {
93 QString nn(n);
94 nn.replace("Sergey", "Sergei");
95 c->addAlias(nn);
96 }
97
98 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
99 if (sr.indexIn(n) >= 0) {
100 QString nr = n;
101 nr.replace(sr.pos(0), sr.matchedLength(), " I");
102 nr.replace(" ", " ");
103 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
104 c->addAlias(nr);
105 }
106 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
107 if (jr.indexIn(n) >= 0) {
108 QString nr = n;
109 nr.replace(jr.pos(0), jr.matchedLength(), " II");
110 nr.replace(" ", " ");
111 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
112 c->addAlias(nr);
113 }
114 QString nr = n;
115 nr.replace("(I)", "I");
116 nr.replace("(II)", "II");
117 nr.replace("(III)", "III");
118 c->addAlias(nr);
119 }
120
121 bool namesFuzzyMatch(QString an, Composer *b)
122 {
123 // ew!
124
125 QString bn = b->name();
126 if (bn == an) return true;
127 if (b->aliases().contains(an)) return true;
128 int aSurnameIndex = 0, bSurnameIndex = 0;
129 if (an.contains(",")) {
130 an.replace(",", "");
131 } else {
132 aSurnameIndex = -1;
133 }
134 if (bn.contains(",")) {
135 bn.replace(",", "");
136 } else {
137 bSurnameIndex = -1;
138 }
139 QStringList nl = an.split(QRegExp("[ -]"));
140 QStringList bnl = bn.split(QRegExp("[ -]"));
141 int matchCount = 0;
142 QString surnameMatch = "";
143 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
144 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
145 if (nl[aSurnameIndex][0].isUpper() &&
146 nl[aSurnameIndex] != "Della" &&
147 nl[aSurnameIndex] == bnl[bSurnameIndex]) {
148 surnameMatch = nl[aSurnameIndex];
149 }
150 foreach (QString elt, nl) {
151 if (!elt[0].isUpper() || elt == "Della") continue;
152 if (bnl.contains(elt)) {
153 ++matchCount;
154 continue;
155 }
156 }
157 if (matchCount > 1 && surnameMatch != "") {
158 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
159 return true;
160 }
161 return false;
162 }
163
164 bool
165 hasBetterName(Composer *c, Composer *other)
166 {
167 if (c->name() == other->name()) return false;
168
169 // Try to guess which of c and other is more likely to have a good
170 // "canonical form" of the composer's name
171
172 if (c->name().startsWith("van ")) {
173 return false; // wrong choice of sort for e.g. LvB; should be
174 // Beethoven, Ludwig van, not van Beethoven, Ludwig
175 }
176 if (other->name().startsWith("van ")) {
177 return true;
178 }
179
180 if (c->aliases().size() != other->aliases().size()) {
181 // a rather weak heuristic
182 return c->aliases().size() > other->aliases().size();
183 }
184
185 if (c->name().contains(',') && !other->name().contains(',')) {
186 // another rather weak heuristic
187 return true;
188 }
189
190 return false;
191 }
192
193 void mergeComposer(Composer *c, ComposerMap &composers)
194 {
195 QString name = c->name();
196
197 QSet<QString> allNames = c->aliases();
198 allNames.insert(name);
199
200 QString dates;
201 if (c->birth()) {
202 if (c->death()) {
203 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
204 } else {
205 dates = QString("%1-").arg(c->birth()->year());
206 }
207 }
208 if (dates != "") {
209 allNames.insert(dates);
210 }
211
212 QSet<Composer *> matches;
213
214 foreach (QString candidateName, allNames) {
215 QString key = candidateName.toLower();
216 if (composers.contains(key)) {
217 foreach (Composer *candidate, composers[key]) {
218 if (candidateName == dates) {
219 if (!namesFuzzyMatch(c->name(), candidate) &&
220 !namesFuzzyMatch(candidate->name(), c)) {
221 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
222 continue;
223 } else {
224 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
225 }
226 } else {
227 if (!datesMatch(c, candidate)) {
228 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
229 continue;
230 }
231 }
232 matches.insert(candidate);
233 }
234 }
235 }
236
237 if (matches.empty()) {
238 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
239
240 if (!c->birth() && !c->death()) {
241 // laboriously look for fuzzy match across _all_ composers
242 for (ComposerMap::iterator i = composers.begin();
243 i != composers.end(); ++i) {
244 foreach (Composer *candidate, *i) {
245 if (namesFuzzyMatch(c->name(), candidate)) {
246 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
247 matches.insert(candidate);
248 break;
249 }
250 }
251 if (!matches.empty()) break;
252 }
253 }
254
255 if (matches.empty()) {
256 foreach (QString candidateName, allNames) {
257 composers[candidateName.toLower()].insert(c);
258 DEBUG << "added for alias or date " << candidateName << endl;
259 }
260 return;
261 }
262 }
263
264 if (matches.size() > 1) {
265 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
266 }
267
268 Composer *other = *matches.begin();
269
270 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
271
272 if (hasBetterName(c, other)) {
273 other->addAlias(other->name());
274 other->setName(c->name());
275 } else {
276 other->addAlias(c->name());
277 }
278 composers[c->name().toLower()].insert(other);
279 DEBUG << "linking from alias " << c->name() << endl;
280
281 foreach (QString alias, c->aliases()) {
282 if (alias != other->name() &&
283 !other->aliases().contains(alias)) {
284 other->addAlias(alias);
285 composers[alias.toLower()].insert(other);
286 DEBUG << "linking from alias " << alias << endl;
287 }
288 }
289
290 foreach (Document *d, c->pages()) {
291 bool found = false;
292 foreach (Document *dd, other->pages()) {
293 if (d->uri() == dd->uri()) {
294 found = true;
295 break;
296 }
297 }
298 if (!found) {
299 d->setTopic(other);
300 other->addPage(d);
301 }
302 }
303
304 //!!! actually the "approximate" bits of the following are bogus;
305 // a source reporting birth or death date as approx is probably
306 // more accurate than one reporting an exact date
307
308 if (c->birth()) {
309 if (!other->birth() || other->birth()->approximate()) {
310 other->setBirth(c->birth());
311 }
312 }
313
314 if (c->death()) {
315 if (!other->death() || other->death()->approximate()) {
316 other->setDeath(c->death());
317 }
318 }
319
320 if (c->gender() != "") other->setGender(c->gender());
321 if (c->nationality() != "") other->setNationality(c->nationality());
322 if (c->remarks() != "") other->setRemarks(c->remarks());
323 if (c->period() != "") other->setPeriod(c->period());
324
325 }
326
327 QString
328 asciify(QString field)
329 {
330 // accented characters etc -- add "ascii version" for dumb search purposes
331 QString ascii;
332 for (int i = 0; i < field.length(); ++i) {
333 QString dc = field[i].decomposition();
334 if (dc != "") ascii += dc[0];
335 else if (field[i] == QChar(0x00DF)) {
336 ascii += "ss";
337 } else {
338 ascii += field[i];
339 }
340 }
341 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
342 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
343 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
344 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
345 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
346 return ascii;
347 }
348
349 void
350 asciify(Composer *c)
351 {
352 QString n = c->name();
353 QString asc = asciify(n);
354 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
355 foreach (QString alias, c->aliases()) {
356 asc = asciify(alias);
357 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
358 }
359 }
360
361 void
362 asciify(Work *w)
363 {
364 QString n = w->name();
365 QString asc = asciify(n);
366 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
367 foreach (QString alias, w->aliases()) {
368 asc = asciify(alias);
369 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
370 }
371 }
372
373 void
374 assignUri(Store *s, Composer *c)
375 {
376 static QSet<QString> convSet;
377 QString conv = c->name();
378 if (!conv.contains(",")) {
379 QStringList sl = conv.split(" ");
380 if (!sl.empty()) {
381 sl.push_front(sl[sl.size()-1]);
382 sl.removeLast();
383 conv = sl.join(" ");
384 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
385 }
386 }
387 conv = asciify(conv);
388 conv.replace(" ", "_");
389 conv.replace("-", "_");
390 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
391 conv = conv.toLower();
392 QString initial = conv;
393 int i = 1;
394 while (convSet.contains(conv)) {
395 conv = QString("%1__%2").arg(initial).arg(i);
396 i++;
397 }
398 convSet.insert(conv);
399 c->setProperty("uri", s->expand(":composer_" + conv));
400 }
401
402 void
403 assignUri(Store *s, Work *w, Composer *c)
404 {
405 QString pfx = c->property("uri").toUrl().toString();
406 DEBUG << "pfx = " << pfx << endl;
407 if (!pfx.contains("composer_")) pfx = "";
408 else pfx.replace(QRegExp("^.*composer_"), "");
409
410 static QSet<QString> convSet;
411 QString conv = w->catalogue();
412 if (conv == "") conv = w->opus();
413 conv = conv.replace(".", "");
414 bool hasOpus = (conv != "");
415 if (conv == "") conv = w->name();
416 if (w->number() != "") conv = conv + "_no" + w->number();
417 if (pfx != "") conv = pfx + "_" + conv;
418 conv = asciify(conv);
419 conv.replace(" ", "_");
420 conv.replace("-", "_");
421 conv.replace(":", "_");
422 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
423 conv = conv.toLower();
424 // I think actually for works we want to merge duplicates rather than
425 // assign them separate URIs, _unless_ they lack a viable opus number
426 if (!hasOpus) {
427 QString initial = conv;
428 int i = 1;
429 while (convSet.contains(conv)) {
430 conv = QString("%1__%2").arg(initial).arg(i);
431 i++;
432 }
433 }
434 convSet.insert(conv);
435 w->setProperty("uri", s->expand(":work_" + conv));
436 }
437
438 void
439 addDbpediaResource(Store *store, QObject *o, QString s)
440 {
441 QUrl u = o->property("uri").toUrl();
442 if (u == QUrl()) return;
443 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
444 store->add(Triple(u,
445 "mo:wikipedia",
446 QUrl(s)));
447 s.replace("http://en.wikipedia.org/wiki/",
448 "http://dbpedia.org/resource/");
449 store->add(Triple(u,
450 "owl:sameAs",
451 QUrl(s)));
452 }
453 }
454
455 int main(int argc, char **argv)
456 {
457 qRegisterMetaType<HistoricalEvent *>
458 ("ClassicalData::HistoricalEvent*");
459 qRegisterMetaType<Birth *>
460 ("ClassicalData::Birth*");
461 qRegisterMetaType<Death *>
462 ("ClassicalData::Death*");
463 qRegisterMetaType<Composition *>
464 ("ClassicalData::Composition*");
465 qRegisterMetaType<Work *>
466 ("ClassicalData::Work*");
467 qRegisterMetaType<Movement *>
468 ("ClassicalData::Movement*");
469 qRegisterMetaType<Composer *>
470 ("ClassicalData::Composer*");
471 qRegisterMetaType<Document *>
472 ("ClassicalData::Document*");
473 qRegisterMetaType<Form *>
474 ("ClassicalData::Form*");
475 qRegisterMetaType<QSet<Work *> >
476 ("QSet<ClassicalData::Work*>");
477 qRegisterMetaType<QSet<Movement *> >
478 ("QSet<ClassicalData::Movement*>");
479 qRegisterMetaType<QSet<Document *> >
480 ("QSet<ClassicalData::Document*>");
481 qRegisterMetaType<QSet<Form *> >
482 ("QSet<ClassicalData::Form*>");
483 qRegisterMetaType<QSet<QString> >
484 ("QSet<QString>");
485
486 qRegisterMetaType<ClassicalComposersOrgImporter *>
487 ("ClassicalData::ClassicalComposersOrgImporter*");
488 qRegisterMetaType<ClassicalDotNetImporter *>
489 ("ClassicalData::ClassicalDotNetImporter*");
490 qRegisterMetaType<WikipediaComposersImporter *>
491 ("ClassicalData::WikipediaComposersImporter*");
492 qRegisterMetaType<WikipediaWorksImporter *>
493 ("ClassicalData::WikipediaWorksImporter*");
494 qRegisterMetaType<WikipediaWorksKImporter *>
495 ("ClassicalData::WikipediaWorksKImporter*");
496 qRegisterMetaType<WikipediaWorksListImporter *>
497 ("ClassicalData::WikipediaWorksListImporter*");
498 qRegisterMetaType<HobokenImporter *>
499 ("ClassicalData::HobokenImporter*");
500
501 ObjectBuilder::getInstance()->registerClass
502 <HistoricalEvent>("ClassicalData::HistoricalEvent*");
503 ObjectBuilder::getInstance()->registerClass
504 <Birth>("ClassicalData::Birth*");
505 ObjectBuilder::getInstance()->registerClass
506 <Death>("ClassicalData::Death*");
507 ObjectBuilder::getInstance()->registerClass
508 <Composition>("ClassicalData::Composition*");
509 ObjectBuilder::getInstance()->registerClass
510 <Work, QObject>("ClassicalData::Work*");
511 ObjectBuilder::getInstance()->registerClass
512 <Movement, QObject>("ClassicalData::Movement*");
513 ObjectBuilder::getInstance()->registerClass
514 <Composer, QObject>("ClassicalData::Composer*");
515 ObjectBuilder::getInstance()->registerClass
516 <Document, QObject>("ClassicalData::Document*");
517 ObjectBuilder::getInstance()->registerClass
518 <Form, QObject>("ClassicalData::Form*");
519
520 ObjectBuilder::getInstance()->registerClass
521 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
522 ObjectBuilder::getInstance()->registerClass
523 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
524 ObjectBuilder::getInstance()->registerClass
525 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
526 ObjectBuilder::getInstance()->registerClass
527 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
528 ObjectBuilder::getInstance()->registerClass
529 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
530 ObjectBuilder::getInstance()->registerClass
531 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
532 ObjectBuilder::getInstance()->registerClass
533 <HobokenImporter>("ClassicalData::HobokenImporter*");
534
535 ContainerBuilder::getInstance()->registerContainer
536 <QString, QSet<QString> >
537 ("QString", "QSet<QString>", ContainerBuilder::SetKind);
538
539 ContainerBuilder::getInstance()->registerContainer
540 <Work*, QSet<Work*> >
541 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>",
542 ContainerBuilder::SetKind);
543
544 ContainerBuilder::getInstance()->registerContainer
545 <Movement*, QSet<Movement*> >
546 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>",
547 ContainerBuilder::SetKind);
548
549 ContainerBuilder::getInstance()->registerContainer
550 <Document*, QSet<Document*> >
551 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>",
552 ContainerBuilder::SetKind);
553
554 ContainerBuilder::getInstance()->registerContainer
555 <Form*, QSet<Form*> >
556 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>",
557 ContainerBuilder::SetKind);
558
559 BasicStore *store = BasicStore::load("file:importers.ttl");
560 ObjectMapper mapper(store);
561 QObject *parentObject = mapper.loadAllObjects(new QObject());
562
563 BasicStore *outstore = new BasicStore();
564 ObjectMapper outmapper(outstore);
565
566 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged);
567
568 outstore->addPrefix("type", outmapper.getObjectTypePrefix());
569 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/");
570 outstore->addPrefix("property", outmapper.getPropertyPrefix());
571 outstore->addPrefix("rel", outmapper.getRelationshipPrefix());
572 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/");
573 outstore->addPrefix("mo", "http://purl.org/ontology/mo/");
574 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/");
575 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/");
576 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#");
577 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#");
578
579 outmapper.addPropertyMapping("ClassicalData::Composer", "pages",
580 outstore->expand("foaf:page"));
581 outmapper.addPropertyMapping("ClassicalData::Composer", "name",
582 outstore->expand("foaf:name"));
583 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases",
584 outstore->expand("property:also_known_as"));
585 outmapper.addPropertyMapping("ClassicalData::Document", "topic",
586 outstore->expand("foaf:primaryTopic"));
587
588 outmapper.addTypeMapping("ClassicalData::Work",
589 outstore->expand("mo:MusicalWork"));
590 outmapper.addPropertyMapping("ClassicalData::Work", "composition",
591 outstore->expand("mo:composed_in"));
592 outmapper.addPropertyMapping("ClassicalData::Work", "opus",
593 outstore->expand("mo:opus"));
594 outmapper.addPropertyMapping("ClassicalData::Work", "k6",
595 outstore->expand("mo:k6"));
596 outmapper.addPropertyMapping("ClassicalData::Work", "bwv",
597 outstore->expand("mo:bwv"));
598 outmapper.addPropertyMapping("ClassicalData::Work", "number",
599 outstore->expand("mo:number"));
600 outmapper.addPropertyMapping("ClassicalData::Work", "partOf",
601 outstore->expand("dc:isPartOf"));
602 outmapper.addPropertyMapping("ClassicalData::Work", "parts",
603 outstore->expand("dc:hasPart"));
604 outmapper.addPropertyMapping("ClassicalData::Work", "pages",
605 outstore->expand("foaf:page"));
606 outmapper.addPropertyMapping("ClassicalData::Work", "forms",
607 outstore->expand("property:form"));
608 outmapper.addPropertyMapping("ClassicalData::Work", "key",
609 outstore->expand("mo:key"));
610 outmapper.addPropertyMapping("ClassicalData::Work", "aliases",
611 outstore->expand("property:also_known_as"));
612 outmapper.addPropertyMapping("ClassicalData::Work", "name",
613 outstore->expand("dc:title"));
614
615 outmapper.addTypeMapping("ClassicalData::Composition",
616 outstore->expand("mo:Composition"));
617 outmapper.addPropertyMapping("ClassicalData::Composition", "composer",
618 outstore->expand("mo:composer"));
619 outmapper.addPropertyMapping("ClassicalData::Composition", "works",
620 outstore->expand("mo:produced_work"));
621
622 outstore->add(Triple("classical:Composer", "a",
623 outstore->expand("owl:Class")));
624 outstore->add(Triple("classical:Composer", "rdfs:subClassOf",
625 outstore->expand("mo:MusicArtist")));
626
627 QList<Importer *> importers = parentObject->findChildren<Importer *>();
628 std::cerr << "have " << importers.size() << " importers" << std::endl;
629
630 ComposerMap composers;
631
632 QList<Composer *> dated;
633 QList<Composer *> undated;
634
635 QList<Work *> works;
636 QList<Composition *> compositions;
637 QList<QObject *> other;
638
639 foreach (Importer *importer, importers) {
640 QObjectList objects = importer->getImportedObjects();
641 foreach (QObject *o, objects) {
642 Composer *c;
643 if ((c = qobject_cast<Composer *>(o))) {
644 addMiscExpansions(c);
645 asciify(c);
646 if (c->birth() || c->death()) dated.push_back(c);
647 else undated.push_back(c);
648 continue;
649 }
650 Work *w;
651 if ((w = qobject_cast<Work *>(o))) {
652 asciify(w);
653 works.push_back(w);
654 continue;
655 }
656 Composition *cn;
657 if ((cn = qobject_cast<Composition *>(o))) {
658 compositions.push_back(cn);
659 continue;
660 }
661 }
662 }
663
664 // get all the dated composers merged before attempting to match
665 // the undated ones
666 foreach (Composer *c, dated) {
667 mergeComposer(c, composers);
668 }
669 foreach (Composer *c, undated) {
670 mergeComposer(c, composers);
671 }
672
673 QObjectList toStore;
674
675 QSet<Composer *> cset;
676 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
677 foreach (Composer *c, i.value()) {
678 if (!cset.contains(c)) {
679 assignUri(outstore, c);
680 toStore.push_back(c);
681 cset.insert(c);
682 }
683 foreach (Document *d, c->pages()) {
684 QString s = d->uri().toString();
685 addDbpediaResource(outstore, c, s);
686 }
687 }
688 }
689
690 QSet<QString> storedUris;
691
692 foreach (Work *w, works) {
693 Composition *cn = w->composition();
694 if (!cn) continue;
695 if (!cn->composer()) {
696 QString cname = cn->composerName();
697 if (cname != "") {
698 if (!composers.contains(cname.toLower())) {
699 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
700 } else {
701 QSet<Composer *> cs = composers[cname.toLower()];
702 if (cs.empty()) {
703 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
704 } else if (cs.size() > 1) {
705 DEBUG << "Failed to assign Composition to composer: "
706 << cs.size() << " composers match name " << cname << endl;
707 } else {
708 cn->setComposer(*cs.begin());
709 }
710 }
711 } else {
712 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
713 }
714 }
715
716 if (cn->composer()) {
717 assignUri(outstore, w, cn->composer());
718 }
719
720 foreach (Document *d, w->pages()) {
721 QString s = d->uri().toString();
722 addDbpediaResource(outstore, w, s);
723 toStore.push_back(d);
724 }
725
726 QString u = w->property("uri").toUrl().toString();
727 if (u == "" || !storedUris.contains(u)) {
728 toStore.push_back(w);
729 if (u != "") storedUris.insert(u);
730 }
731 }
732
733 try {
734 outmapper.storeAllObjects(toStore);
735
736 } catch (RDFException e) {
737 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
738 }
739
740 DEBUG << "Stored, now saving" << endl;
741
742 outstore->save("test-out.ttl");
743
744 DEBUG << "Saved" << endl;
745
746
747 QMultiMap<QString, Composer *> cmap;
748 foreach (Composer *c, cset) {
749 QString n = c->getSortName(true);
750 cmap.insert(n, c);
751 }
752
753 std::cout << "Composers: " << cmap.size() << std::endl;
754
755 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
756 i != cmap.end(); ++i) {
757
758 QString n = i.key();
759 Composer *c = i.value();
760
761 std::cout << n.toStdString();
762
763 QString d = c->getDisplayDates();
764 if (d != "") std::cout << " (" << d.toStdString() << ")";
765 std::cout << std::endl;
766 }
767
768 std::cout << std::endl;
769
770 std::cout << "Works by composer:" << std::endl;
771
772 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
773 i != cmap.end(); ++i) {
774
775 QString n = i.key();
776 Composer *c = i.value();
777
778 std::set<Work *, Work::Ordering> wmap;
779 foreach (Work *w, works) {
780 Composition *cn = w->composition();
781 if (!cn) continue;
782 if (cn->composer() != c) continue;
783 if (w->partOf()) continue;
784 wmap.insert(w);
785 }
786
787 if (wmap.empty()) continue;
788
789 std::cout << n.toStdString() << std::endl;
790
791 foreach (Work *w, wmap) {
792 std::cout << " * ";
793 std::cout << w->name().toStdString();
794 if (w->catalogue() != "") {
795 std::cout << " [" << w->catalogue().toStdString() << "]";
796 }
797 if (w->opus() != "") {
798 std::cout << " [op. " << w->opus().toStdString() << "]";
799 }
800 std::cout << std::endl;
801 std::set<Work *, Work::Ordering> orderedParts;
802 foreach (Work *ww, w->parts()) {
803 orderedParts.insert(ww);
804 }
805 foreach (Work *ww, orderedParts) {
806 std::cout << " ";
807 if (ww->number() != "") {
808 std::cout << ww->number().toStdString() << ". ";
809 }
810 std::cout << ww->name().toStdString();
811 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
812 std::cout << " [" << ww->catalogue().toStdString() << "]";
813 }
814 if (ww->opus() != "" && ww->opus() != w->opus()) {
815 std::cout << " [op. " << ww->opus().toStdString() << "]";
816 }
817 std::cout << std::endl;
818 }
819 }
820
821 std::cout << std::endl;
822 }
823
824 delete outstore;
825
826 DEBUG << "Done" << endl;
827
828
829 }
830
831