Mercurial > hg > classical
comparison import/Test.cpp @ 0:e8f4c2b55fd8 classical-rdf
* reorganise
author | Chris Cannam |
---|---|
date | Tue, 01 Dec 2009 17:50:41 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e8f4c2b55fd8 |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 #include "Objects.h" | |
4 | |
5 #include <dataquay/BasicStore.h> | |
6 #include <dataquay/RDFException.h> | |
7 #include <dataquay/objectmapper/ObjectMapper.h> | |
8 #include <dataquay/objectmapper/ObjectBuilder.h> | |
9 #include <dataquay/objectmapper/ContainerBuilder.h> | |
10 | |
11 #include "ImportClassicalComposersOrg.h" | |
12 #include "ImportClassicalDotNet.h" | |
13 #include "ImportWikipediaComposers.h" | |
14 #include "ImportWikipediaWorks.h" | |
15 #include "ImportWikipediaWorksK.h" | |
16 #include "ImportWikipediaWorksList.h" | |
17 #include "ImportHoboken.h" | |
18 | |
19 #include <dataquay/Debug.h> | |
20 | |
21 using namespace ClassicalData; | |
22 using namespace Dataquay; | |
23 | |
24 #include <iostream> | |
25 #include <set> | |
26 | |
27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers | |
28 | |
29 bool datesMatch(Composer *a, Composer *b) | |
30 { | |
31 if (a->birth() && b->birth()) { | |
32 if (abs(a->birth()->year() - b->birth()->year()) > 1) { | |
33 if ((!a->birth()->approximate() && !b->birth()->approximate()) || | |
34 (abs(a->birth()->year() - b->birth()->year()) > 10)) { | |
35 return false; | |
36 } | |
37 } | |
38 } | |
39 if (a->death() && b->death()) { | |
40 if (abs(a->death()->year() - b->death()->year()) > 1) { | |
41 if ((!a->death()->approximate() && !b->death()->approximate()) || | |
42 (abs(a->death()->year() - b->death()->year()) > 10)) { | |
43 return false; | |
44 } | |
45 } | |
46 } | |
47 return true; | |
48 } | |
49 | |
50 void | |
51 addMiscExpansions(Composer *c) | |
52 { | |
53 QString n = c->name(); | |
54 | |
55 DEBUG << "addMiscExpansions: n = " << n << endl; | |
56 | |
57 // lovely hard-coded special cases go here! some of these are | |
58 // needed for works->composer assignments | |
59 if (n == "Balakirev, Milii") { | |
60 c->addAlias("Mily Balakirev"); | |
61 } | |
62 if (n.startsWith("Cui, C")) { | |
63 c->addAlias(QString::fromUtf8("C\303\251sar Cui")); | |
64 } | |
65 if (n == "Handel, George Frideric") { | |
66 c->addAlias("Handel, Georg Friedrich"); | |
67 c->addAlias("Handel"); | |
68 } | |
69 if (n == "Mayr, Simon") { | |
70 c->addAlias("Mayr"); | |
71 } | |
72 | |
73 n.replace(", Sr.", " Sr."); | |
74 n.replace(", Jr.", " Jr."); | |
75 | |
76 int comma = n.indexOf(", "); | |
77 if (comma > 0 && comma + 2 < n.length()) { | |
78 | |
79 QString left = n.left(comma); | |
80 QString right = n.right(n.length() - comma - 2); | |
81 | |
82 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$"); | |
83 if (jrsr.indexIn(right) >= 0) { | |
84 left = left + jrsr.cap(1); | |
85 right = right.left(right.length()-jrsr.matchedLength()); | |
86 } | |
87 n = right + " " + left; | |
88 } | |
89 | |
90 if (n != c->name()) c->addAlias(n); | |
91 | |
92 if (n.contains("Sergey")) { | |
93 QString nn(n); | |
94 nn.replace("Sergey", "Sergei"); | |
95 c->addAlias(nn); | |
96 } | |
97 | |
98 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive); | |
99 if (sr.indexIn(n) >= 0) { | |
100 QString nr = n; | |
101 nr.replace(sr.pos(0), sr.matchedLength(), " I"); | |
102 nr.replace(" ", " "); | |
103 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; | |
104 c->addAlias(nr); | |
105 } | |
106 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive); | |
107 if (jr.indexIn(n) >= 0) { | |
108 QString nr = n; | |
109 nr.replace(jr.pos(0), jr.matchedLength(), " II"); | |
110 nr.replace(" ", " "); | |
111 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; | |
112 c->addAlias(nr); | |
113 } | |
114 QString nr = n; | |
115 nr.replace("(I)", "I"); | |
116 nr.replace("(II)", "II"); | |
117 nr.replace("(III)", "III"); | |
118 c->addAlias(nr); | |
119 } | |
120 | |
121 bool namesFuzzyMatch(QString an, Composer *b) | |
122 { | |
123 // ew! | |
124 | |
125 QString bn = b->name(); | |
126 if (bn == an) return true; | |
127 if (b->aliases().contains(an)) return true; | |
128 int aSurnameIndex = 0, bSurnameIndex = 0; | |
129 if (an.contains(",")) { | |
130 an.replace(",", ""); | |
131 } else { | |
132 aSurnameIndex = -1; | |
133 } | |
134 if (bn.contains(",")) { | |
135 bn.replace(",", ""); | |
136 } else { | |
137 bSurnameIndex = -1; | |
138 } | |
139 QStringList nl = an.split(QRegExp("[ -]")); | |
140 QStringList bnl = bn.split(QRegExp("[ -]")); | |
141 int matchCount = 0; | |
142 QString surnameMatch = ""; | |
143 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; | |
144 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; | |
145 if (nl[aSurnameIndex][0].isUpper() && | |
146 nl[aSurnameIndex] != "Della" && | |
147 nl[aSurnameIndex] == bnl[bSurnameIndex]) { | |
148 surnameMatch = nl[aSurnameIndex]; | |
149 } | |
150 foreach (QString elt, nl) { | |
151 if (!elt[0].isUpper() || elt == "Della") continue; | |
152 if (bnl.contains(elt)) { | |
153 ++matchCount; | |
154 continue; | |
155 } | |
156 } | |
157 if (matchCount > 1 && surnameMatch != "") { | |
158 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; | |
159 return true; | |
160 } | |
161 return false; | |
162 } | |
163 | |
164 bool | |
165 hasBetterName(Composer *c, Composer *other) | |
166 { | |
167 if (c->name() == other->name()) return false; | |
168 | |
169 // Try to guess which of c and other is more likely to have a good | |
170 // "canonical form" of the composer's name | |
171 | |
172 if (c->name().startsWith("van ")) { | |
173 return false; // wrong choice of sort for e.g. LvB; should be | |
174 // Beethoven, Ludwig van, not van Beethoven, Ludwig | |
175 } | |
176 if (other->name().startsWith("van ")) { | |
177 return true; | |
178 } | |
179 | |
180 if (c->aliases().size() != other->aliases().size()) { | |
181 // a rather weak heuristic | |
182 return c->aliases().size() > other->aliases().size(); | |
183 } | |
184 | |
185 if (c->name().contains(',') && !other->name().contains(',')) { | |
186 // another rather weak heuristic | |
187 return true; | |
188 } | |
189 | |
190 return false; | |
191 } | |
192 | |
193 void mergeComposer(Composer *c, ComposerMap &composers) | |
194 { | |
195 QString name = c->name(); | |
196 | |
197 QSet<QString> allNames = c->aliases(); | |
198 allNames.insert(name); | |
199 | |
200 QString dates; | |
201 if (c->birth()) { | |
202 if (c->death()) { | |
203 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year()); | |
204 } else { | |
205 dates = QString("%1-").arg(c->birth()->year()); | |
206 } | |
207 } | |
208 if (dates != "") { | |
209 allNames.insert(dates); | |
210 } | |
211 | |
212 QSet<Composer *> matches; | |
213 | |
214 foreach (QString candidateName, allNames) { | |
215 QString key = candidateName.toLower(); | |
216 if (composers.contains(key)) { | |
217 foreach (Composer *candidate, composers[key]) { | |
218 if (candidateName == dates) { | |
219 if (!namesFuzzyMatch(c->name(), candidate) && | |
220 !namesFuzzyMatch(candidate->name(), c)) { | |
221 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; | |
222 continue; | |
223 } else { | |
224 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; | |
225 } | |
226 } else { | |
227 if (!datesMatch(c, candidate)) { | |
228 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; | |
229 continue; | |
230 } | |
231 } | |
232 matches.insert(candidate); | |
233 } | |
234 } | |
235 } | |
236 | |
237 if (matches.empty()) { | |
238 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; | |
239 | |
240 if (!c->birth() && !c->death()) { | |
241 // laboriously look for fuzzy match across _all_ composers | |
242 for (ComposerMap::iterator i = composers.begin(); | |
243 i != composers.end(); ++i) { | |
244 foreach (Composer *candidate, *i) { | |
245 if (namesFuzzyMatch(c->name(), candidate)) { | |
246 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; | |
247 matches.insert(candidate); | |
248 break; | |
249 } | |
250 } | |
251 if (!matches.empty()) break; | |
252 } | |
253 } | |
254 | |
255 if (matches.empty()) { | |
256 foreach (QString candidateName, allNames) { | |
257 composers[candidateName.toLower()].insert(c); | |
258 DEBUG << "added for alias or date " << candidateName << endl; | |
259 } | |
260 return; | |
261 } | |
262 } | |
263 | |
264 if (matches.size() > 1) { | |
265 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl; | |
266 } | |
267 | |
268 Composer *other = *matches.begin(); | |
269 | |
270 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl; | |
271 | |
272 if (hasBetterName(c, other)) { | |
273 other->addAlias(other->name()); | |
274 other->setName(c->name()); | |
275 } else { | |
276 other->addAlias(c->name()); | |
277 } | |
278 composers[c->name().toLower()].insert(other); | |
279 DEBUG << "linking from alias " << c->name() << endl; | |
280 | |
281 foreach (QString alias, c->aliases()) { | |
282 if (alias != other->name() && | |
283 !other->aliases().contains(alias)) { | |
284 other->addAlias(alias); | |
285 composers[alias.toLower()].insert(other); | |
286 DEBUG << "linking from alias " << alias << endl; | |
287 } | |
288 } | |
289 | |
290 foreach (Document *d, c->pages()) { | |
291 bool found = false; | |
292 foreach (Document *dd, other->pages()) { | |
293 if (d->uri() == dd->uri()) { | |
294 found = true; | |
295 break; | |
296 } | |
297 } | |
298 if (!found) { | |
299 d->setTopic(other); | |
300 other->addPage(d); | |
301 } | |
302 } | |
303 | |
304 //!!! actually the "approximate" bits of the following are bogus; | |
305 // a source reporting birth or death date as approx is probably | |
306 // more accurate than one reporting an exact date | |
307 | |
308 if (c->birth()) { | |
309 if (!other->birth() || other->birth()->approximate()) { | |
310 other->setBirth(c->birth()); | |
311 } | |
312 } | |
313 | |
314 if (c->death()) { | |
315 if (!other->death() || other->death()->approximate()) { | |
316 other->setDeath(c->death()); | |
317 } | |
318 } | |
319 | |
320 if (c->gender() != "") other->setGender(c->gender()); | |
321 if (c->nationality() != "") other->setNationality(c->nationality()); | |
322 if (c->remarks() != "") other->setRemarks(c->remarks()); | |
323 if (c->period() != "") other->setPeriod(c->period()); | |
324 | |
325 } | |
326 | |
327 QString | |
328 asciify(QString field) | |
329 { | |
330 // accented characters etc -- add "ascii version" for dumb search purposes | |
331 QString ascii; | |
332 for (int i = 0; i < field.length(); ++i) { | |
333 QString dc = field[i].decomposition(); | |
334 if (dc != "") ascii += dc[0]; | |
335 else if (field[i] == QChar(0x00DF)) { | |
336 ascii += "ss"; | |
337 } else { | |
338 ascii += field[i]; | |
339 } | |
340 } | |
341 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe | |
342 ascii.replace(QString::fromUtf8("\342\200\222"), "-"); | |
343 ascii.replace(QString::fromUtf8("\342\200\223"), "-"); | |
344 ascii.replace(QString::fromUtf8("\342\200\224"), "-"); | |
345 ascii.replace(QString::fromUtf8("\342\200\225"), "-"); | |
346 return ascii; | |
347 } | |
348 | |
349 void | |
350 asciify(Composer *c) | |
351 { | |
352 QString n = c->name(); | |
353 QString asc = asciify(n); | |
354 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc); | |
355 foreach (QString alias, c->aliases()) { | |
356 asc = asciify(alias); | |
357 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc); | |
358 } | |
359 } | |
360 | |
361 void | |
362 asciify(Work *w) | |
363 { | |
364 QString n = w->name(); | |
365 QString asc = asciify(n); | |
366 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc); | |
367 foreach (QString alias, w->aliases()) { | |
368 asc = asciify(alias); | |
369 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc); | |
370 } | |
371 } | |
372 | |
373 void | |
374 assignUri(Store *s, Composer *c) | |
375 { | |
376 static QSet<QString> convSet; | |
377 QString conv = c->name(); | |
378 if (!conv.contains(",")) { | |
379 QStringList sl = conv.split(" "); | |
380 if (!sl.empty()) { | |
381 sl.push_front(sl[sl.size()-1]); | |
382 sl.removeLast(); | |
383 conv = sl.join(" "); | |
384 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl; | |
385 } | |
386 } | |
387 conv = asciify(conv); | |
388 conv.replace(" ", "_"); | |
389 conv.replace("-", "_"); | |
390 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); | |
391 conv = conv.toLower(); | |
392 QString initial = conv; | |
393 int i = 1; | |
394 while (convSet.contains(conv)) { | |
395 conv = QString("%1__%2").arg(initial).arg(i); | |
396 i++; | |
397 } | |
398 convSet.insert(conv); | |
399 c->setProperty("uri", s->expand(":composer_" + conv)); | |
400 } | |
401 | |
402 void | |
403 assignUri(Store *s, Work *w, Composer *c) | |
404 { | |
405 QString pfx = c->property("uri").toUrl().toString(); | |
406 DEBUG << "pfx = " << pfx << endl; | |
407 if (!pfx.contains("composer_")) pfx = ""; | |
408 else pfx.replace(QRegExp("^.*composer_"), ""); | |
409 | |
410 static QSet<QString> convSet; | |
411 QString conv = w->catalogue(); | |
412 if (conv == "") conv = w->opus(); | |
413 conv = conv.replace(".", ""); | |
414 bool hasOpus = (conv != ""); | |
415 if (conv == "") conv = w->name(); | |
416 if (w->number() != "") conv = conv + "_no" + w->number(); | |
417 if (pfx != "") conv = pfx + "_" + conv; | |
418 conv = asciify(conv); | |
419 conv.replace(" ", "_"); | |
420 conv.replace("-", "_"); | |
421 conv.replace(":", "_"); | |
422 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); | |
423 conv = conv.toLower(); | |
424 // I think actually for works we want to merge duplicates rather than | |
425 // assign them separate URIs, _unless_ they lack a viable opus number | |
426 if (!hasOpus) { | |
427 QString initial = conv; | |
428 int i = 1; | |
429 while (convSet.contains(conv)) { | |
430 conv = QString("%1__%2").arg(initial).arg(i); | |
431 i++; | |
432 } | |
433 } | |
434 convSet.insert(conv); | |
435 w->setProperty("uri", s->expand(":work_" + conv)); | |
436 } | |
437 | |
438 void | |
439 addDbpediaResource(Store *store, QObject *o, QString s) | |
440 { | |
441 QUrl u = o->property("uri").toUrl(); | |
442 if (u == QUrl()) return; | |
443 if (s.startsWith("http://en.wikipedia.org/wiki/")) { | |
444 store->add(Triple(u, | |
445 "mo:wikipedia", | |
446 QUrl(s))); | |
447 s.replace("http://en.wikipedia.org/wiki/", | |
448 "http://dbpedia.org/resource/"); | |
449 store->add(Triple(u, | |
450 "owl:sameAs", | |
451 QUrl(s))); | |
452 } | |
453 } | |
454 | |
455 int main(int argc, char **argv) | |
456 { | |
457 qRegisterMetaType<HistoricalEvent *> | |
458 ("ClassicalData::HistoricalEvent*"); | |
459 qRegisterMetaType<Birth *> | |
460 ("ClassicalData::Birth*"); | |
461 qRegisterMetaType<Death *> | |
462 ("ClassicalData::Death*"); | |
463 qRegisterMetaType<Composition *> | |
464 ("ClassicalData::Composition*"); | |
465 qRegisterMetaType<Work *> | |
466 ("ClassicalData::Work*"); | |
467 qRegisterMetaType<Movement *> | |
468 ("ClassicalData::Movement*"); | |
469 qRegisterMetaType<Composer *> | |
470 ("ClassicalData::Composer*"); | |
471 qRegisterMetaType<Document *> | |
472 ("ClassicalData::Document*"); | |
473 qRegisterMetaType<Form *> | |
474 ("ClassicalData::Form*"); | |
475 qRegisterMetaType<QSet<Work *> > | |
476 ("QSet<ClassicalData::Work*>"); | |
477 qRegisterMetaType<QSet<Movement *> > | |
478 ("QSet<ClassicalData::Movement*>"); | |
479 qRegisterMetaType<QSet<Document *> > | |
480 ("QSet<ClassicalData::Document*>"); | |
481 qRegisterMetaType<QSet<Form *> > | |
482 ("QSet<ClassicalData::Form*>"); | |
483 qRegisterMetaType<QSet<QString> > | |
484 ("QSet<QString>"); | |
485 | |
486 qRegisterMetaType<ClassicalComposersOrgImporter *> | |
487 ("ClassicalData::ClassicalComposersOrgImporter*"); | |
488 qRegisterMetaType<ClassicalDotNetImporter *> | |
489 ("ClassicalData::ClassicalDotNetImporter*"); | |
490 qRegisterMetaType<WikipediaComposersImporter *> | |
491 ("ClassicalData::WikipediaComposersImporter*"); | |
492 qRegisterMetaType<WikipediaWorksImporter *> | |
493 ("ClassicalData::WikipediaWorksImporter*"); | |
494 qRegisterMetaType<WikipediaWorksKImporter *> | |
495 ("ClassicalData::WikipediaWorksKImporter*"); | |
496 qRegisterMetaType<WikipediaWorksListImporter *> | |
497 ("ClassicalData::WikipediaWorksListImporter*"); | |
498 qRegisterMetaType<HobokenImporter *> | |
499 ("ClassicalData::HobokenImporter*"); | |
500 | |
501 ObjectBuilder::getInstance()->registerClass | |
502 <HistoricalEvent>("ClassicalData::HistoricalEvent*"); | |
503 ObjectBuilder::getInstance()->registerClass | |
504 <Birth>("ClassicalData::Birth*"); | |
505 ObjectBuilder::getInstance()->registerClass | |
506 <Death>("ClassicalData::Death*"); | |
507 ObjectBuilder::getInstance()->registerClass | |
508 <Composition>("ClassicalData::Composition*"); | |
509 ObjectBuilder::getInstance()->registerClass | |
510 <Work, QObject>("ClassicalData::Work*"); | |
511 ObjectBuilder::getInstance()->registerClass | |
512 <Movement, QObject>("ClassicalData::Movement*"); | |
513 ObjectBuilder::getInstance()->registerClass | |
514 <Composer, QObject>("ClassicalData::Composer*"); | |
515 ObjectBuilder::getInstance()->registerClass | |
516 <Document, QObject>("ClassicalData::Document*"); | |
517 ObjectBuilder::getInstance()->registerClass | |
518 <Form, QObject>("ClassicalData::Form*"); | |
519 | |
520 ObjectBuilder::getInstance()->registerClass | |
521 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*"); | |
522 ObjectBuilder::getInstance()->registerClass | |
523 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*"); | |
524 ObjectBuilder::getInstance()->registerClass | |
525 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*"); | |
526 ObjectBuilder::getInstance()->registerClass | |
527 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*"); | |
528 ObjectBuilder::getInstance()->registerClass | |
529 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*"); | |
530 ObjectBuilder::getInstance()->registerClass | |
531 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*"); | |
532 ObjectBuilder::getInstance()->registerClass | |
533 <HobokenImporter>("ClassicalData::HobokenImporter*"); | |
534 | |
535 ContainerBuilder::getInstance()->registerContainer | |
536 <QString, QSet<QString> > | |
537 ("QString", "QSet<QString>", ContainerBuilder::SetKind); | |
538 | |
539 ContainerBuilder::getInstance()->registerContainer | |
540 <Work*, QSet<Work*> > | |
541 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>", | |
542 ContainerBuilder::SetKind); | |
543 | |
544 ContainerBuilder::getInstance()->registerContainer | |
545 <Movement*, QSet<Movement*> > | |
546 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>", | |
547 ContainerBuilder::SetKind); | |
548 | |
549 ContainerBuilder::getInstance()->registerContainer | |
550 <Document*, QSet<Document*> > | |
551 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>", | |
552 ContainerBuilder::SetKind); | |
553 | |
554 ContainerBuilder::getInstance()->registerContainer | |
555 <Form*, QSet<Form*> > | |
556 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>", | |
557 ContainerBuilder::SetKind); | |
558 | |
559 BasicStore *store = BasicStore::load("file:importers.ttl"); | |
560 ObjectMapper mapper(store); | |
561 QObject *parentObject = mapper.loadAllObjects(new QObject()); | |
562 | |
563 BasicStore *outstore = new BasicStore(); | |
564 ObjectMapper outmapper(outstore); | |
565 | |
566 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged); | |
567 | |
568 outstore->addPrefix("type", outmapper.getObjectTypePrefix()); | |
569 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/"); | |
570 outstore->addPrefix("property", outmapper.getPropertyPrefix()); | |
571 outstore->addPrefix("rel", outmapper.getRelationshipPrefix()); | |
572 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/"); | |
573 outstore->addPrefix("mo", "http://purl.org/ontology/mo/"); | |
574 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/"); | |
575 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/"); | |
576 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#"); | |
577 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); | |
578 | |
579 outmapper.addPropertyMapping("ClassicalData::Composer", "pages", | |
580 outstore->expand("foaf:page")); | |
581 outmapper.addPropertyMapping("ClassicalData::Composer", "name", | |
582 outstore->expand("foaf:name")); | |
583 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", | |
584 outstore->expand("property:also_known_as")); | |
585 outmapper.addPropertyMapping("ClassicalData::Document", "topic", | |
586 outstore->expand("foaf:primaryTopic")); | |
587 | |
588 outmapper.addTypeMapping("ClassicalData::Work", | |
589 outstore->expand("mo:MusicalWork")); | |
590 outmapper.addPropertyMapping("ClassicalData::Work", "composition", | |
591 outstore->expand("mo:composed_in")); | |
592 outmapper.addPropertyMapping("ClassicalData::Work", "opus", | |
593 outstore->expand("mo:opus")); | |
594 outmapper.addPropertyMapping("ClassicalData::Work", "k6", | |
595 outstore->expand("mo:k6")); | |
596 outmapper.addPropertyMapping("ClassicalData::Work", "bwv", | |
597 outstore->expand("mo:bwv")); | |
598 outmapper.addPropertyMapping("ClassicalData::Work", "number", | |
599 outstore->expand("mo:number")); | |
600 outmapper.addPropertyMapping("ClassicalData::Work", "partOf", | |
601 outstore->expand("dc:isPartOf")); | |
602 outmapper.addPropertyMapping("ClassicalData::Work", "parts", | |
603 outstore->expand("dc:hasPart")); | |
604 outmapper.addPropertyMapping("ClassicalData::Work", "pages", | |
605 outstore->expand("foaf:page")); | |
606 outmapper.addPropertyMapping("ClassicalData::Work", "forms", | |
607 outstore->expand("property:form")); | |
608 outmapper.addPropertyMapping("ClassicalData::Work", "key", | |
609 outstore->expand("mo:key")); | |
610 outmapper.addPropertyMapping("ClassicalData::Work", "aliases", | |
611 outstore->expand("property:also_known_as")); | |
612 outmapper.addPropertyMapping("ClassicalData::Work", "name", | |
613 outstore->expand("dc:title")); | |
614 | |
615 outmapper.addTypeMapping("ClassicalData::Composition", | |
616 outstore->expand("mo:Composition")); | |
617 outmapper.addPropertyMapping("ClassicalData::Composition", "composer", | |
618 outstore->expand("mo:composer")); | |
619 outmapper.addPropertyMapping("ClassicalData::Composition", "works", | |
620 outstore->expand("mo:produced_work")); | |
621 | |
622 outstore->add(Triple("classical:Composer", "a", | |
623 outstore->expand("owl:Class"))); | |
624 outstore->add(Triple("classical:Composer", "rdfs:subClassOf", | |
625 outstore->expand("mo:MusicArtist"))); | |
626 | |
627 QList<Importer *> importers = parentObject->findChildren<Importer *>(); | |
628 std::cerr << "have " << importers.size() << " importers" << std::endl; | |
629 | |
630 ComposerMap composers; | |
631 | |
632 QList<Composer *> dated; | |
633 QList<Composer *> undated; | |
634 | |
635 QList<Work *> works; | |
636 QList<Composition *> compositions; | |
637 QList<QObject *> other; | |
638 | |
639 foreach (Importer *importer, importers) { | |
640 QObjectList objects = importer->getImportedObjects(); | |
641 foreach (QObject *o, objects) { | |
642 Composer *c; | |
643 if ((c = qobject_cast<Composer *>(o))) { | |
644 addMiscExpansions(c); | |
645 asciify(c); | |
646 if (c->birth() || c->death()) dated.push_back(c); | |
647 else undated.push_back(c); | |
648 continue; | |
649 } | |
650 Work *w; | |
651 if ((w = qobject_cast<Work *>(o))) { | |
652 asciify(w); | |
653 works.push_back(w); | |
654 continue; | |
655 } | |
656 Composition *cn; | |
657 if ((cn = qobject_cast<Composition *>(o))) { | |
658 compositions.push_back(cn); | |
659 continue; | |
660 } | |
661 } | |
662 } | |
663 | |
664 // get all the dated composers merged before attempting to match | |
665 // the undated ones | |
666 foreach (Composer *c, dated) { | |
667 mergeComposer(c, composers); | |
668 } | |
669 foreach (Composer *c, undated) { | |
670 mergeComposer(c, composers); | |
671 } | |
672 | |
673 QObjectList toStore; | |
674 | |
675 QSet<Composer *> cset; | |
676 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { | |
677 foreach (Composer *c, i.value()) { | |
678 if (!cset.contains(c)) { | |
679 assignUri(outstore, c); | |
680 toStore.push_back(c); | |
681 cset.insert(c); | |
682 } | |
683 foreach (Document *d, c->pages()) { | |
684 QString s = d->uri().toString(); | |
685 addDbpediaResource(outstore, c, s); | |
686 } | |
687 } | |
688 } | |
689 | |
690 QSet<QString> storedUris; | |
691 | |
692 foreach (Work *w, works) { | |
693 Composition *cn = w->composition(); | |
694 if (!cn) continue; | |
695 if (!cn->composer()) { | |
696 QString cname = cn->composerName(); | |
697 if (cname != "") { | |
698 if (!composers.contains(cname.toLower())) { | |
699 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | |
700 } else { | |
701 QSet<Composer *> cs = composers[cname.toLower()]; | |
702 if (cs.empty()) { | |
703 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | |
704 } else if (cs.size() > 1) { | |
705 DEBUG << "Failed to assign Composition to composer: " | |
706 << cs.size() << " composers match name " << cname << endl; | |
707 } else { | |
708 cn->setComposer(*cs.begin()); | |
709 } | |
710 } | |
711 } else { | |
712 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl; | |
713 } | |
714 } | |
715 | |
716 if (cn->composer()) { | |
717 assignUri(outstore, w, cn->composer()); | |
718 } | |
719 | |
720 foreach (Document *d, w->pages()) { | |
721 QString s = d->uri().toString(); | |
722 addDbpediaResource(outstore, w, s); | |
723 toStore.push_back(d); | |
724 } | |
725 | |
726 QString u = w->property("uri").toUrl().toString(); | |
727 if (u == "" || !storedUris.contains(u)) { | |
728 toStore.push_back(w); | |
729 if (u != "") storedUris.insert(u); | |
730 } | |
731 } | |
732 | |
733 try { | |
734 outmapper.storeAllObjects(toStore); | |
735 | |
736 } catch (RDFException e) { | |
737 std::cerr << "Caught RDF exception: " << e.what() << std::endl; | |
738 } | |
739 | |
740 DEBUG << "Stored, now saving" << endl; | |
741 | |
742 outstore->save("test-out.ttl"); | |
743 | |
744 DEBUG << "Saved" << endl; | |
745 | |
746 | |
747 QMultiMap<QString, Composer *> cmap; | |
748 foreach (Composer *c, cset) { | |
749 QString n = c->getSortName(true); | |
750 cmap.insert(n, c); | |
751 } | |
752 | |
753 std::cout << "Composers: " << cmap.size() << std::endl; | |
754 | |
755 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); | |
756 i != cmap.end(); ++i) { | |
757 | |
758 QString n = i.key(); | |
759 Composer *c = i.value(); | |
760 | |
761 std::cout << n.toStdString(); | |
762 | |
763 QString d = c->getDisplayDates(); | |
764 if (d != "") std::cout << " (" << d.toStdString() << ")"; | |
765 std::cout << std::endl; | |
766 } | |
767 | |
768 std::cout << std::endl; | |
769 | |
770 std::cout << "Works by composer:" << std::endl; | |
771 | |
772 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); | |
773 i != cmap.end(); ++i) { | |
774 | |
775 QString n = i.key(); | |
776 Composer *c = i.value(); | |
777 | |
778 std::set<Work *, Work::Ordering> wmap; | |
779 foreach (Work *w, works) { | |
780 Composition *cn = w->composition(); | |
781 if (!cn) continue; | |
782 if (cn->composer() != c) continue; | |
783 if (w->partOf()) continue; | |
784 wmap.insert(w); | |
785 } | |
786 | |
787 if (wmap.empty()) continue; | |
788 | |
789 std::cout << n.toStdString() << std::endl; | |
790 | |
791 foreach (Work *w, wmap) { | |
792 std::cout << " * "; | |
793 std::cout << w->name().toStdString(); | |
794 if (w->catalogue() != "") { | |
795 std::cout << " [" << w->catalogue().toStdString() << "]"; | |
796 } | |
797 if (w->opus() != "") { | |
798 std::cout << " [op. " << w->opus().toStdString() << "]"; | |
799 } | |
800 std::cout << std::endl; | |
801 std::set<Work *, Work::Ordering> orderedParts; | |
802 foreach (Work *ww, w->parts()) { | |
803 orderedParts.insert(ww); | |
804 } | |
805 foreach (Work *ww, orderedParts) { | |
806 std::cout << " "; | |
807 if (ww->number() != "") { | |
808 std::cout << ww->number().toStdString() << ". "; | |
809 } | |
810 std::cout << ww->name().toStdString(); | |
811 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) { | |
812 std::cout << " [" << ww->catalogue().toStdString() << "]"; | |
813 } | |
814 if (ww->opus() != "" && ww->opus() != w->opus()) { | |
815 std::cout << " [op. " << ww->opus().toStdString() << "]"; | |
816 } | |
817 std::cout << std::endl; | |
818 } | |
819 } | |
820 | |
821 std::cout << std::endl; | |
822 } | |
823 | |
824 delete outstore; | |
825 | |
826 DEBUG << "Done" << endl; | |
827 | |
828 | |
829 } | |
830 | |
831 |