Mercurial > hg > classical
comparison import/Import.cpp @ 1:29ca5974905d classical-rdf
* More work on a nice tidy import; get some sensible URIs etc
author | Chris Cannam |
---|---|
date | Thu, 03 Dec 2009 15:42:10 +0000 |
parents | import/Test.cpp@e8f4c2b55fd8 |
children | ff067a1e7e3d |
comparison
equal
deleted
inserted
replaced
0:e8f4c2b55fd8 | 1:29ca5974905d |
---|---|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ | |
2 | |
3 #include "Objects.h" | |
4 | |
5 #include <dataquay/BasicStore.h> | |
6 #include <dataquay/RDFException.h> | |
7 #include <dataquay/objectmapper/ObjectMapper.h> | |
8 #include <dataquay/objectmapper/ObjectBuilder.h> | |
9 #include <dataquay/objectmapper/ContainerBuilder.h> | |
10 | |
11 #include "ImportClassicalComposersOrg.h" | |
12 #include "ImportClassicalDotNet.h" | |
13 #include "ImportWikipediaComposers.h" | |
14 #include "ImportWikipediaWorks.h" | |
15 #include "ImportWikipediaWorksK.h" | |
16 #include "ImportWikipediaWorksList.h" | |
17 #include "ImportHoboken.h" | |
18 | |
19 #include <dataquay/Debug.h> | |
20 | |
21 using namespace ClassicalData; | |
22 using namespace Dataquay; | |
23 | |
24 #include <iostream> | |
25 #include <set> | |
26 | |
27 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers | |
28 | |
29 void | |
30 addMiscExpansions(Composer *c) | |
31 { | |
32 QString n = c->name(); | |
33 | |
34 DEBUG << "addMiscExpansions: n = " << n << endl; | |
35 | |
36 // lovely hard-coded special cases go here! some of these are | |
37 // needed for works->composer assignments | |
38 if (n == "Balakirev, Milii") { | |
39 c->addAlias("Mily Balakirev"); | |
40 } | |
41 if (n.startsWith("Cui, C")) { | |
42 c->addAlias(QString::fromUtf8("C\303\251sar Cui")); | |
43 } | |
44 if (n == "Handel, George Frideric") { | |
45 c->addAlias("Handel, Georg Friedrich"); | |
46 c->addAlias("Handel"); | |
47 } | |
48 if (n == "Prokofiev, Sergey") { | |
49 c->addAlias("Prokofieff, Sergei"); | |
50 c->addAlias("Sergei Prokofieff"); | |
51 } | |
52 if (n == "Rossini, Gioacchino") { | |
53 c->addAlias("Rossini, Gioachino"); | |
54 c->addAlias("Gioachino Rossini"); | |
55 } | |
56 if (n == "Edwards, Richard") { | |
57 c->addAlias("Edwardes, Richard"); | |
58 c->addAlias("Richard Edwardes"); | |
59 c->addAlias("Richard Edwards"); | |
60 } | |
61 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") { | |
62 c->addAlias("Nikolai Rimsky-Korsakov"); | |
63 } | |
64 if (n.startsWith("Piccinni, Nico")) { | |
65 c->addAlias(n); | |
66 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262")); | |
67 } | |
68 if (n == "Tchaikovsky, Pyotr Ilyich") { | |
69 c->addAlias("Tchaikovsky, Piotr Ilyitch"); | |
70 } | |
71 if (n == "Wilhelm Stenhammar") { | |
72 c->addAlias("Stenhammar, Vilhelm Eugene"); | |
73 c->setName("Stenhammar, Wilhelm"); | |
74 c->addAlias(n); | |
75 } | |
76 if (n == "Mercadante, Saverio Rafaele") { | |
77 c->addAlias("Mercadante, Giuseppe"); | |
78 } | |
79 if (n == "Johann Wenzel Anton Stamitz") { | |
80 c->addAlias(n); | |
81 c->setName("Stamitz, Johann Wenzel Anton"); | |
82 c->addAlias("Stamitz, Jan Vaclav"); | |
83 } | |
84 if (n == "Mario Castelnuovo-Tedesco") { | |
85 c->addAlias("Castelnuovo Tedesco, Mario"); | |
86 } | |
87 if (n == "Mayr, Simon") { | |
88 c->addAlias("Mayr"); | |
89 } | |
90 | |
91 n.replace(", Sr.", " Sr."); | |
92 n.replace(", Jr.", " Jr."); | |
93 | |
94 int comma = n.indexOf(", "); | |
95 if (comma > 0 && comma + 2 < n.length()) { | |
96 | |
97 QString left = n.left(comma); | |
98 QString right = n.right(n.length() - comma - 2); | |
99 | |
100 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$"); | |
101 if (jrsr.indexIn(right) >= 0) { | |
102 left = left + jrsr.cap(1); | |
103 right = right.left(right.length()-jrsr.matchedLength()); | |
104 } | |
105 n = right + " " + left; | |
106 } | |
107 | |
108 if (n != c->name()) c->addAlias(n); | |
109 | |
110 if (n.contains("Sergey")) { | |
111 QString nn(n); | |
112 nn.replace("Sergey", "Sergei"); | |
113 c->addAlias(nn); | |
114 } else if (n.contains("Sergei")) { | |
115 QString nn(n); | |
116 nn.replace("Sergei", "Sergey"); | |
117 c->addAlias(nn); | |
118 } | |
119 | |
120 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive); | |
121 if (sr.indexIn(n) >= 0) { | |
122 QString nr = n; | |
123 nr.replace(sr.pos(0), sr.matchedLength(), " I"); | |
124 nr.replace(" ", " "); | |
125 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; | |
126 c->addAlias(nr); | |
127 } | |
128 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive); | |
129 if (jr.indexIn(n) >= 0) { | |
130 QString nr = n; | |
131 nr.replace(jr.pos(0), jr.matchedLength(), " II"); | |
132 nr.replace(" ", " "); | |
133 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; | |
134 c->addAlias(nr); | |
135 } | |
136 QString nr = n; | |
137 nr.replace("(I)", "I"); | |
138 nr.replace("(II)", "II"); | |
139 nr.replace("(III)", "III"); | |
140 c->addAlias(nr); | |
141 } | |
142 | |
143 bool namesFuzzyMatch(QString an, Composer *b) | |
144 { | |
145 // ew! | |
146 | |
147 QString bn = b->name(); | |
148 if (bn == an) return true; | |
149 if (b->aliases().contains(an)) return true; | |
150 int aSurnameIndex = 0, bSurnameIndex = 0; | |
151 if (an.contains(",")) { | |
152 an.replace(",", ""); | |
153 } else { | |
154 aSurnameIndex = -1; | |
155 } | |
156 if (bn.contains(",")) { | |
157 bn.replace(",", ""); | |
158 } else { | |
159 bSurnameIndex = -1; | |
160 } | |
161 QStringList nl = an.split(QRegExp("[ -]")); | |
162 QStringList bnl = bn.split(QRegExp("[ -]")); | |
163 int matchCount = 0; | |
164 QString surnameMatch = ""; | |
165 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; | |
166 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; | |
167 if (nl[aSurnameIndex][0].isUpper() && | |
168 nl[aSurnameIndex] != "Della" && | |
169 nl[aSurnameIndex] == bnl[bSurnameIndex]) { | |
170 surnameMatch = nl[aSurnameIndex]; | |
171 } | |
172 foreach (QString elt, nl) { | |
173 if (!elt[0].isUpper() || elt == "Della") continue; | |
174 if (bnl.contains(elt)) { | |
175 ++matchCount; | |
176 continue; | |
177 } | |
178 } | |
179 if (matchCount > 1 && surnameMatch != "") { | |
180 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; | |
181 return true; | |
182 } | |
183 return false; | |
184 } | |
185 | |
186 bool | |
187 hasBetterName(Composer *c, Composer *other) | |
188 { | |
189 if (c->name() == other->name()) return false; | |
190 | |
191 // Try to guess which of c and other is more likely to have a good | |
192 // "canonical form" of the composer's name | |
193 | |
194 if (c->name().startsWith("van ")) { | |
195 return false; // wrong choice of sort for e.g. LvB; should be | |
196 // Beethoven, Ludwig van, not van Beethoven, Ludwig | |
197 } | |
198 if (other->name().startsWith("van ")) { | |
199 return true; | |
200 } | |
201 | |
202 if (c->aliases().size() != other->aliases().size()) { | |
203 // a rather weak heuristic | |
204 return c->aliases().size() > other->aliases().size(); | |
205 } | |
206 | |
207 if (c->name().contains(',') && !other->name().contains(',')) { | |
208 // another rather weak heuristic | |
209 return true; | |
210 } | |
211 | |
212 return false; | |
213 } | |
214 | |
215 void mergeComposer(Composer *c, ComposerMap &composers) | |
216 { | |
217 QString name = c->name(); | |
218 | |
219 QSet<QString> allNames = c->aliases(); | |
220 allNames.insert(name); | |
221 | |
222 QString dates; | |
223 if (c->birth()) { | |
224 if (c->death()) { | |
225 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year()); | |
226 } else { | |
227 dates = QString("%1-").arg(c->birth()->year()); | |
228 } | |
229 } | |
230 if (dates != "") { | |
231 allNames.insert(dates); | |
232 } | |
233 | |
234 QSet<Composer *> matches; | |
235 | |
236 foreach (QString candidateName, allNames) { | |
237 QString key = candidateName.toLower(); | |
238 if (composers.contains(key)) { | |
239 foreach (Composer *candidate, composers[key]) { | |
240 if (candidateName == dates) { | |
241 if (!namesFuzzyMatch(c->name(), candidate) && | |
242 !namesFuzzyMatch(candidate->name(), c)) { | |
243 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; | |
244 continue; | |
245 } else { | |
246 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; | |
247 } | |
248 } else { | |
249 if (!c->datesMatch(candidate)) { | |
250 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; | |
251 continue; | |
252 } | |
253 } | |
254 matches.insert(candidate); | |
255 } | |
256 } | |
257 } | |
258 | |
259 if (matches.empty()) { | |
260 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; | |
261 | |
262 if (!c->birth() && !c->death()) { | |
263 // laboriously look for fuzzy match across _all_ composers | |
264 for (ComposerMap::iterator i = composers.begin(); | |
265 i != composers.end(); ++i) { | |
266 foreach (Composer *candidate, *i) { | |
267 if (namesFuzzyMatch(c->name(), candidate)) { | |
268 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; | |
269 matches.insert(candidate); | |
270 break; | |
271 } | |
272 } | |
273 if (!matches.empty()) break; | |
274 } | |
275 } | |
276 | |
277 if (matches.empty()) { | |
278 foreach (QString candidateName, allNames) { | |
279 composers[candidateName.toLower()].insert(c); | |
280 DEBUG << "added for alias or date " << candidateName << endl; | |
281 } | |
282 return; | |
283 } | |
284 } | |
285 | |
286 if (matches.size() > 1) { | |
287 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl; | |
288 } | |
289 | |
290 Composer *other = *matches.begin(); | |
291 | |
292 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl; | |
293 | |
294 if (hasBetterName(c, other)) { | |
295 other->addAlias(other->name()); | |
296 other->setName(c->name()); | |
297 } else { | |
298 other->addAlias(c->name()); | |
299 } | |
300 composers[c->name().toLower()].insert(other); | |
301 DEBUG << "linking from alias " << c->name() << endl; | |
302 | |
303 foreach (QString alias, c->aliases()) { | |
304 if (alias != other->name() && | |
305 !other->aliases().contains(alias)) { | |
306 other->addAlias(alias); | |
307 composers[alias.toLower()].insert(other); | |
308 DEBUG << "linking from alias " << alias << endl; | |
309 } | |
310 } | |
311 | |
312 foreach (Document *d, c->pages()) { | |
313 bool found = false; | |
314 foreach (Document *dd, other->pages()) { | |
315 if (d->uri() == dd->uri()) { | |
316 found = true; | |
317 break; | |
318 } | |
319 } | |
320 if (!found) { | |
321 d->setTopic(other); | |
322 other->addPage(d); | |
323 } | |
324 } | |
325 | |
326 //!!! actually the "approximate" bits of the following are bogus; | |
327 // a source reporting birth or death date as approx is probably | |
328 // more accurate than one reporting an exact date | |
329 | |
330 if (c->birth()) { | |
331 if (!other->birth() || other->birth()->approximate()) { | |
332 other->setBirth(c->birth()); | |
333 } | |
334 } | |
335 | |
336 if (c->death()) { | |
337 if (!other->death() || other->death()->approximate()) { | |
338 other->setDeath(c->death()); | |
339 } | |
340 } | |
341 | |
342 if (c->gender() != "") other->setGender(c->gender()); | |
343 if (c->nationality() != "") other->setNationality(c->nationality()); | |
344 if (c->remarks() != "") other->setRemarks(c->remarks()); | |
345 if (c->period() != "") other->setPeriod(c->period()); | |
346 | |
347 } | |
348 | |
349 QString | |
350 asciify(QString field) | |
351 { | |
352 // accented characters etc -- add "ascii version" for dumb search purposes | |
353 QString ascii; | |
354 for (int i = 0; i < field.length(); ++i) { | |
355 QString dc = field[i].decomposition(); | |
356 if (dc != "") ascii += dc[0]; | |
357 else if (field[i] == QChar(0x00DF)) { | |
358 ascii += "ss"; | |
359 } else { | |
360 ascii += field[i]; | |
361 } | |
362 } | |
363 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe | |
364 ascii.replace(QString::fromUtf8("\342\200\222"), "-"); | |
365 ascii.replace(QString::fromUtf8("\342\200\223"), "-"); | |
366 ascii.replace(QString::fromUtf8("\342\200\224"), "-"); | |
367 ascii.replace(QString::fromUtf8("\342\200\225"), "-"); | |
368 return ascii; | |
369 } | |
370 | |
371 void | |
372 asciify(Composer *c) | |
373 { | |
374 QString n = c->name(); | |
375 QString asc = asciify(n); | |
376 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc); | |
377 foreach (QString alias, c->aliases()) { | |
378 asc = asciify(alias); | |
379 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc); | |
380 } | |
381 } | |
382 | |
383 void | |
384 asciify(Work *w) | |
385 { | |
386 QString n = w->name(); | |
387 QString asc = asciify(n); | |
388 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc); | |
389 foreach (QString alias, w->aliases()) { | |
390 asc = asciify(alias); | |
391 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc); | |
392 } | |
393 } | |
394 | |
395 void | |
396 assignUri(Store *s, Composer *c) | |
397 { | |
398 static QSet<QString> convSet; | |
399 QString conv = c->name(); | |
400 if (!conv.contains(",")) { | |
401 QStringList sl = conv.split(" "); | |
402 if (!sl.empty()) { | |
403 sl.push_front(sl[sl.size()-1]); | |
404 sl.removeLast(); | |
405 conv = sl.join(" "); | |
406 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl; | |
407 } | |
408 } | |
409 conv = asciify(conv); | |
410 conv.replace(" ", "_"); | |
411 conv.replace("-", "_"); | |
412 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); | |
413 conv = conv.toLower(); | |
414 QString initial = conv; | |
415 int i = 2; | |
416 while (convSet.contains(conv)) { | |
417 conv = QString("%1__%2").arg(initial).arg(i); | |
418 i++; | |
419 } | |
420 convSet.insert(conv); | |
421 c->setProperty("uri", s->expand(":composer/" + conv)); | |
422 } | |
423 | |
424 void | |
425 assignUri(Store *s, Work *w, Composer *c) | |
426 { | |
427 QString pfx = c->property("uri").toUrl().toString(); | |
428 DEBUG << "pfx = " << pfx << endl; | |
429 if (!pfx.contains("composer/")) pfx = ""; | |
430 | |
431 static QSet<QString> convSet; | |
432 | |
433 QString conv = w->catalogue(); | |
434 if (conv == "") conv = w->opus(); | |
435 conv = conv.replace(".", ""); | |
436 bool hasOpus = (conv != ""); | |
437 if (conv == "") conv = w->name().toLower(); | |
438 if (w->number() != "") conv = conv + "_no" + w->number(); | |
439 conv = asciify(conv); | |
440 conv.replace(" ", "_"); | |
441 conv.replace("-", "_"); | |
442 conv.replace(":", "_"); | |
443 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); | |
444 | |
445 if (pfx != "") conv = pfx + "/work/" + conv; | |
446 | |
447 // I think actually for works we want to merge duplicates rather than | |
448 // assign them separate URIs, _unless_ they lack a viable opus number | |
449 if (!hasOpus) { | |
450 QString initial = conv; | |
451 int i = 2; | |
452 while (convSet.contains(conv)) { | |
453 conv = QString("%1__%2").arg(initial).arg(i); | |
454 i++; | |
455 } | |
456 } | |
457 convSet.insert(conv); | |
458 | |
459 w->setProperty("uri", conv); | |
460 } | |
461 | |
462 void | |
463 addDbpediaResource(Store *store, QObject *o, QString s) | |
464 { | |
465 QUrl u = o->property("uri").toUrl(); | |
466 if (u == QUrl()) return; | |
467 if (s.startsWith("http://en.wikipedia.org/wiki/")) { | |
468 store->add(Triple(u, | |
469 "mo:wikipedia", | |
470 QUrl(s))); | |
471 s.replace("http://en.wikipedia.org/wiki/", | |
472 "http://dbpedia.org/resource/"); | |
473 store->add(Triple(u, | |
474 "owl:sameAs", | |
475 QUrl(s))); | |
476 } | |
477 } | |
478 | |
479 int main(int argc, char **argv) | |
480 { | |
481 qRegisterMetaType<HistoricalEvent *> | |
482 ("ClassicalData::HistoricalEvent*"); | |
483 qRegisterMetaType<Birth *> | |
484 ("ClassicalData::Birth*"); | |
485 qRegisterMetaType<Death *> | |
486 ("ClassicalData::Death*"); | |
487 qRegisterMetaType<Composition *> | |
488 ("ClassicalData::Composition*"); | |
489 qRegisterMetaType<Work *> | |
490 ("ClassicalData::Work*"); | |
491 qRegisterMetaType<Movement *> | |
492 ("ClassicalData::Movement*"); | |
493 qRegisterMetaType<Composer *> | |
494 ("ClassicalData::Composer*"); | |
495 qRegisterMetaType<Document *> | |
496 ("ClassicalData::Document*"); | |
497 qRegisterMetaType<Form *> | |
498 ("ClassicalData::Form*"); | |
499 qRegisterMetaType<QSet<Work *> > | |
500 ("QSet<ClassicalData::Work*>"); | |
501 qRegisterMetaType<QSet<Movement *> > | |
502 ("QSet<ClassicalData::Movement*>"); | |
503 qRegisterMetaType<QSet<Document *> > | |
504 ("QSet<ClassicalData::Document*>"); | |
505 qRegisterMetaType<QSet<Form *> > | |
506 ("QSet<ClassicalData::Form*>"); | |
507 qRegisterMetaType<QSet<QString> > | |
508 ("QSet<QString>"); | |
509 | |
510 qRegisterMetaType<ClassicalComposersOrgImporter *> | |
511 ("ClassicalData::ClassicalComposersOrgImporter*"); | |
512 qRegisterMetaType<ClassicalDotNetImporter *> | |
513 ("ClassicalData::ClassicalDotNetImporter*"); | |
514 qRegisterMetaType<WikipediaComposersImporter *> | |
515 ("ClassicalData::WikipediaComposersImporter*"); | |
516 qRegisterMetaType<WikipediaWorksImporter *> | |
517 ("ClassicalData::WikipediaWorksImporter*"); | |
518 qRegisterMetaType<WikipediaWorksKImporter *> | |
519 ("ClassicalData::WikipediaWorksKImporter*"); | |
520 qRegisterMetaType<WikipediaWorksListImporter *> | |
521 ("ClassicalData::WikipediaWorksListImporter*"); | |
522 qRegisterMetaType<HobokenImporter *> | |
523 ("ClassicalData::HobokenImporter*"); | |
524 | |
525 ObjectBuilder::getInstance()->registerClass | |
526 <HistoricalEvent>("ClassicalData::HistoricalEvent*"); | |
527 ObjectBuilder::getInstance()->registerClass | |
528 <Birth>("ClassicalData::Birth*"); | |
529 ObjectBuilder::getInstance()->registerClass | |
530 <Death>("ClassicalData::Death*"); | |
531 ObjectBuilder::getInstance()->registerClass | |
532 <Composition>("ClassicalData::Composition*"); | |
533 ObjectBuilder::getInstance()->registerClass | |
534 <Work, QObject>("ClassicalData::Work*"); | |
535 ObjectBuilder::getInstance()->registerClass | |
536 <Movement, QObject>("ClassicalData::Movement*"); | |
537 ObjectBuilder::getInstance()->registerClass | |
538 <Composer, QObject>("ClassicalData::Composer*"); | |
539 ObjectBuilder::getInstance()->registerClass | |
540 <Document, QObject>("ClassicalData::Document*"); | |
541 ObjectBuilder::getInstance()->registerClass | |
542 <Form, QObject>("ClassicalData::Form*"); | |
543 | |
544 ObjectBuilder::getInstance()->registerClass | |
545 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*"); | |
546 ObjectBuilder::getInstance()->registerClass | |
547 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*"); | |
548 ObjectBuilder::getInstance()->registerClass | |
549 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*"); | |
550 ObjectBuilder::getInstance()->registerClass | |
551 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*"); | |
552 ObjectBuilder::getInstance()->registerClass | |
553 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*"); | |
554 ObjectBuilder::getInstance()->registerClass | |
555 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*"); | |
556 ObjectBuilder::getInstance()->registerClass | |
557 <HobokenImporter>("ClassicalData::HobokenImporter*"); | |
558 | |
559 ContainerBuilder::getInstance()->registerContainer | |
560 <QString, QSet<QString> > | |
561 ("QString", "QSet<QString>", ContainerBuilder::SetKind); | |
562 | |
563 ContainerBuilder::getInstance()->registerContainer | |
564 <Work*, QSet<Work*> > | |
565 ("ClassicalData::Work*", "QSet<ClassicalData::Work*>", | |
566 ContainerBuilder::SetKind); | |
567 | |
568 ContainerBuilder::getInstance()->registerContainer | |
569 <Movement*, QSet<Movement*> > | |
570 ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>", | |
571 ContainerBuilder::SetKind); | |
572 | |
573 ContainerBuilder::getInstance()->registerContainer | |
574 <Document*, QSet<Document*> > | |
575 ("ClassicalData::Document*", "QSet<ClassicalData::Document*>", | |
576 ContainerBuilder::SetKind); | |
577 | |
578 ContainerBuilder::getInstance()->registerContainer | |
579 <Form*, QSet<Form*> > | |
580 ("ClassicalData::Form*", "QSet<ClassicalData::Form*>", | |
581 ContainerBuilder::SetKind); | |
582 | |
583 BasicStore *store = BasicStore::load("file:importers.ttl"); | |
584 ObjectMapper mapper(store); | |
585 QObject *parentObject = mapper.loadAllObjects(new QObject()); | |
586 | |
587 BasicStore *outstore = new BasicStore(); | |
588 outstore->setBaseUri("http://dbtune.org/classical/resource/"); | |
589 ObjectMapper outmapper(outstore); | |
590 | |
591 outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged); | |
592 | |
593 outmapper.setObjectTypePrefix("http://dbtune.org/classical/resource/"); | |
594 outmapper.setPropertyPrefix("http://dbtune.org/classical/resource/vocab/"); | |
595 outmapper.setRelationshipPrefix("http://dbtune.org/classical/resource/vocab/relationship/"); | |
596 | |
597 outstore->addPrefix("type", outmapper.getObjectTypePrefix()); | |
598 outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "type/"); | |
599 outstore->addPrefix("property", outmapper.getPropertyPrefix()); | |
600 outstore->addPrefix("rel", outmapper.getRelationshipPrefix()); | |
601 | |
602 outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/"); | |
603 outstore->addPrefix("mo", "http://purl.org/ontology/mo/"); | |
604 outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/"); | |
605 outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/"); | |
606 outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#"); | |
607 outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); | |
608 outstore->addPrefix("db", "http://dbtune.org/musicbrainz/resource/"); | |
609 outstore->addPrefix("dbv", "http://dbtune.org/musicbrainz/resource/vocab/"); | |
610 | |
611 outmapper.addTypeMapping("ClassicalData::Composer", "classical:Composer"); | |
612 outmapper.addPropertyMapping("ClassicalData::Composer", "pages", "foaf:page"); | |
613 outmapper.addPropertyMapping("ClassicalData::Composer", "name", "foaf:name"); | |
614 outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", "dbv:alias"); | |
615 outmapper.addPropertyMapping("ClassicalData::Composer", "birth", "property:birth"); | |
616 outmapper.addPropertyMapping("ClassicalData::Composer", "death", "property:death"); | |
617 | |
618 outmapper.addTypeMapping("ClassicalData::Birth", "bio:Birth"); | |
619 outmapper.addTypeMapping("ClassicalData::Death", "bio:Death"); | |
620 outmapper.addPropertyMapping("ClassicalData::Birth", "year", "bio:date"); | |
621 outmapper.addPropertyMapping("ClassicalData::Death", "year", "bio:date"); | |
622 outmapper.addPropertyMapping("ClassicalData::Birth", "place", "bio:place"); | |
623 outmapper.addPropertyMapping("ClassicalData::Death", "place", "bio:place"); | |
624 | |
625 outmapper.addTypeMapping("ClassicalData::Document", "foaf:Document"); | |
626 outmapper.addPropertyMapping("ClassicalData::Document", "topic", "foaf:primaryTopic"); | |
627 | |
628 outmapper.addTypeMapping("ClassicalData::Work", "mo:MusicalWork"); | |
629 | |
630 outmapper.addPropertyMapping("ClassicalData::Work", "composition", "mo:composed_in"); | |
631 outmapper.addPropertyMapping("ClassicalData::Work", "opus", "mo:opus"); | |
632 outmapper.addPropertyMapping("ClassicalData::Work", "catalogue", "mo:catalogue"); | |
633 outmapper.addPropertyMapping("ClassicalData::Work", "number", "mo:number"); | |
634 outmapper.addPropertyMapping("ClassicalData::Work", "partOf", "dc:isPartOf"); | |
635 outmapper.addPropertyMapping("ClassicalData::Work", "parts", "dc:hasPart"); | |
636 outmapper.addPropertyMapping("ClassicalData::Work", "pages", "foaf:page"); | |
637 outmapper.addPropertyMapping("ClassicalData::Work", "forms", "property:form"); | |
638 outmapper.addPropertyMapping("ClassicalData::Work", "key", "mo:key"); | |
639 outmapper.addPropertyMapping("ClassicalData::Work", "aliases", "dbv:alias"); | |
640 outmapper.addPropertyMapping("ClassicalData::Work", "name", "dc:title"); | |
641 | |
642 outmapper.addTypeMapping("ClassicalData::Composition", "mo:Composition"); | |
643 outmapper.addPropertyMapping("ClassicalData::Composition", "composer", "mo:composer"); | |
644 outmapper.addPropertyMapping("ClassicalData::Composition", "works", "mo:produced_work"); | |
645 | |
646 outstore->add(Triple("classical:Composer", "a", outstore->expand("owl:Class"))); | |
647 outstore->add(Triple("classical:Composer", "rdfs:subClassOf", outstore->expand("mo:MusicArtist"))); | |
648 | |
649 outstore->add(Triple("property:birth", "a", outstore->expand("owl:ObjectProperty"))); | |
650 outstore->add(Triple("property:birth", "rdfs:subPropertyOf", outstore->expand("bio:event"))); | |
651 | |
652 outstore->add(Triple("property:death", "a", outstore->expand("owl:ObjectProperty"))); | |
653 outstore->add(Triple("property:death", "rdfs:subPropertyOf", outstore->expand("bio:event"))); | |
654 | |
655 QList<Importer *> importers = parentObject->findChildren<Importer *>(); | |
656 std::cerr << "have " << importers.size() << " importers" << std::endl; | |
657 | |
658 ComposerMap composers; | |
659 | |
660 QList<Composer *> dated; | |
661 QList<Composer *> undated; | |
662 | |
663 QList<Work *> works; | |
664 QList<Composition *> compositions; | |
665 QList<QObject *> other; | |
666 | |
667 foreach (Importer *importer, importers) { | |
668 QObjectList objects = importer->getImportedObjects(); | |
669 foreach (QObject *o, objects) { | |
670 Composer *c; | |
671 if ((c = qobject_cast<Composer *>(o))) { | |
672 addMiscExpansions(c); | |
673 asciify(c); | |
674 if (c->birth() || c->death()) dated.push_back(c); | |
675 else undated.push_back(c); | |
676 continue; | |
677 } | |
678 Work *w; | |
679 if ((w = qobject_cast<Work *>(o))) { | |
680 asciify(w); | |
681 works.push_back(w); | |
682 continue; | |
683 } | |
684 Composition *cn; | |
685 if ((cn = qobject_cast<Composition *>(o))) { | |
686 compositions.push_back(cn); | |
687 continue; | |
688 } | |
689 } | |
690 } | |
691 | |
692 // get all the dated composers merged before attempting to match | |
693 // the undated ones | |
694 foreach (Composer *c, dated) { | |
695 mergeComposer(c, composers); | |
696 } | |
697 foreach (Composer *c, undated) { | |
698 mergeComposer(c, composers); | |
699 } | |
700 | |
701 QObjectList toStore; | |
702 | |
703 QSet<Composer *> cset; | |
704 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { | |
705 foreach (Composer *c, i.value()) { | |
706 if (!cset.contains(c)) { | |
707 assignUri(outstore, c); | |
708 toStore.push_back(c); | |
709 cset.insert(c); | |
710 } | |
711 foreach (Document *d, c->pages()) { | |
712 QString s = d->uri().toString(); | |
713 addDbpediaResource(outstore, c, s); | |
714 } | |
715 } | |
716 } | |
717 | |
718 QSet<QString> storedUris; | |
719 | |
720 foreach (Work *w, works) { | |
721 Composition *cn = w->composition(); | |
722 if (!cn) continue; | |
723 if (!cn->composer()) { | |
724 QString cname = cn->composerName(); | |
725 if (cname != "") { | |
726 if (!composers.contains(cname.toLower())) { | |
727 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | |
728 } else { | |
729 QSet<Composer *> cs = composers[cname.toLower()]; | |
730 if (cs.empty()) { | |
731 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | |
732 } else if (cs.size() > 1) { | |
733 DEBUG << "Failed to assign Composition to composer: " | |
734 << cs.size() << " composers match name " << cname << endl; | |
735 } else { | |
736 cn->setComposer(*cs.begin()); | |
737 } | |
738 } | |
739 } else { | |
740 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl; | |
741 } | |
742 } | |
743 | |
744 if (cn->composer()) { | |
745 assignUri(outstore, w, cn->composer()); | |
746 } | |
747 | |
748 foreach (Document *d, w->pages()) { | |
749 QString s = d->uri().toString(); | |
750 addDbpediaResource(outstore, w, s); | |
751 if (!storedUris.contains(s)) { | |
752 toStore.push_back(d); | |
753 storedUris.insert(s); | |
754 } | |
755 } | |
756 | |
757 QString u = w->property("uri").toUrl().toString(); | |
758 if (u == "" || !storedUris.contains(u)) { | |
759 toStore.push_back(w); | |
760 if (u != "") storedUris.insert(u); | |
761 } | |
762 } | |
763 | |
764 try { | |
765 outmapper.storeAllObjects(toStore); | |
766 | |
767 } catch (RDFException e) { | |
768 std::cerr << "Caught RDF exception: " << e.what() << std::endl; | |
769 } | |
770 | |
771 DEBUG << "Stored, now saving" << endl; | |
772 | |
773 outstore->save("test-out.ttl"); | |
774 | |
775 DEBUG << "Saved" << endl; | |
776 | |
777 | |
778 QMultiMap<QString, Composer *> cmap; | |
779 foreach (Composer *c, cset) { | |
780 QString n = c->getSortName(true); | |
781 cmap.insert(n, c); | |
782 } | |
783 | |
784 std::cout << "Composers: " << cmap.size() << std::endl; | |
785 | |
786 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); | |
787 i != cmap.end(); ++i) { | |
788 | |
789 QString n = i.key(); | |
790 Composer *c = i.value(); | |
791 | |
792 std::cout << n.toStdString(); | |
793 | |
794 QString d = c->getDisplayDates(); | |
795 if (d != "") std::cout << " (" << d.toStdString() << ")"; | |
796 std::cout << std::endl; | |
797 } | |
798 | |
799 std::cout << std::endl; | |
800 | |
801 std::cout << "Works by composer:" << std::endl; | |
802 | |
803 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); | |
804 i != cmap.end(); ++i) { | |
805 | |
806 QString n = i.key(); | |
807 Composer *c = i.value(); | |
808 | |
809 std::set<Work *, Work::Ordering> wmap; | |
810 foreach (Work *w, works) { | |
811 Composition *cn = w->composition(); | |
812 if (!cn) continue; | |
813 if (cn->composer() != c) continue; | |
814 if (w->partOf()) continue; | |
815 wmap.insert(w); | |
816 } | |
817 | |
818 if (wmap.empty()) continue; | |
819 | |
820 std::cout << n.toStdString() << std::endl; | |
821 | |
822 foreach (Work *w, wmap) { | |
823 std::cout << " * "; | |
824 std::cout << w->name().toStdString(); | |
825 if (w->catalogue() != "") { | |
826 std::cout << " [" << w->catalogue().toStdString() << "]"; | |
827 } | |
828 if (w->opus() != "") { | |
829 std::cout << " [op. " << w->opus().toStdString() << "]"; | |
830 } | |
831 std::cout << std::endl; | |
832 std::set<Work *, Work::Ordering> orderedParts; | |
833 foreach (Work *ww, w->parts()) { | |
834 orderedParts.insert(ww); | |
835 } | |
836 foreach (Work *ww, orderedParts) { | |
837 std::cout << " "; | |
838 if (ww->number() != "") { | |
839 std::cout << ww->number().toStdString() << ". "; | |
840 } | |
841 std::cout << ww->name().toStdString(); | |
842 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) { | |
843 std::cout << " [" << ww->catalogue().toStdString() << "]"; | |
844 } | |
845 if (ww->opus() != "" && ww->opus() != w->opus()) { | |
846 std::cout << " [op. " << ww->opus().toStdString() << "]"; | |
847 } | |
848 std::cout << std::endl; | |
849 } | |
850 } | |
851 | |
852 std::cout << std::endl; | |
853 } | |
854 | |
855 delete outstore; | |
856 | |
857 DEBUG << "Done" << endl; | |
858 | |
859 | |
860 } | |
861 | |
862 |