Chris@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@0
|
2
|
Chris@0
|
3 #include "Objects.h"
|
Chris@0
|
4
|
Chris@0
|
5 #include <dataquay/BasicStore.h>
|
Chris@0
|
6 #include <dataquay/RDFException.h>
|
Chris@28
|
7 #include <dataquay/objectmapper/ObjectStorer.h>
|
Chris@28
|
8 #include <dataquay/objectmapper/ObjectLoader.h>
|
Chris@0
|
9 #include <dataquay/objectmapper/ObjectBuilder.h>
|
Chris@28
|
10 #include <dataquay/objectmapper/TypeMapping.h>
|
Chris@0
|
11 #include <dataquay/objectmapper/ContainerBuilder.h>
|
Chris@0
|
12
|
Chris@0
|
13 #include "ImportClassicalComposersOrg.h"
|
Chris@0
|
14 #include "ImportClassicalDotNet.h"
|
Chris@4
|
15 #include "ImportClassicalArchives.h"
|
Chris@0
|
16 #include "ImportWikipediaComposers.h"
|
Chris@0
|
17 #include "ImportWikipediaWorks.h"
|
Chris@0
|
18 #include "ImportWikipediaWorksK.h"
|
Chris@0
|
19 #include "ImportWikipediaWorksList.h"
|
Chris@0
|
20 #include "ImportHoboken.h"
|
Chris@0
|
21
|
Chris@7
|
22 #include "TypeRegistrar.h"
|
Chris@7
|
23
|
Chris@0
|
24 #include <dataquay/Debug.h>
|
Chris@0
|
25
|
Chris@0
|
26 using namespace ClassicalData;
|
Chris@0
|
27 using namespace Dataquay;
|
Chris@0
|
28
|
Chris@0
|
29 #include <iostream>
|
Chris@0
|
30 #include <set>
|
Chris@0
|
31
|
Chris@0
|
32 typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers
|
Chris@0
|
33
|
Chris@0
|
34 void
|
Chris@0
|
35 addMiscExpansions(Composer *c)
|
Chris@0
|
36 {
|
Chris@0
|
37 QString n = c->name();
|
Chris@0
|
38
|
Chris@0
|
39 DEBUG << "addMiscExpansions: n = " << n << endl;
|
Chris@0
|
40
|
Chris@0
|
41 // lovely hard-coded special cases go here! some of these are
|
Chris@0
|
42 // needed for works->composer assignments
|
Chris@0
|
43 if (n == "Balakirev, Milii") {
|
Chris@0
|
44 c->addAlias("Mily Balakirev");
|
Chris@0
|
45 }
|
Chris@0
|
46 if (n.startsWith("Cui, C")) {
|
Chris@0
|
47 c->addAlias(QString::fromUtf8("C\303\251sar Cui"));
|
Chris@0
|
48 }
|
Chris@0
|
49 if (n == "Handel, George Frideric") {
|
Chris@0
|
50 c->addAlias("Handel, Georg Friedrich");
|
Chris@0
|
51 c->addAlias("Handel");
|
Chris@0
|
52 }
|
Chris@1
|
53 if (n == "Prokofiev, Sergey") {
|
Chris@1
|
54 c->addAlias("Prokofieff, Sergei");
|
Chris@1
|
55 c->addAlias("Sergei Prokofieff");
|
Chris@1
|
56 }
|
Chris@1
|
57 if (n == "Rossini, Gioacchino") {
|
Chris@1
|
58 c->addAlias("Rossini, Gioachino");
|
Chris@1
|
59 c->addAlias("Gioachino Rossini");
|
Chris@1
|
60 }
|
Chris@1
|
61 if (n == "Edwards, Richard") {
|
Chris@1
|
62 c->addAlias("Edwardes, Richard");
|
Chris@1
|
63 c->addAlias("Richard Edwardes");
|
Chris@1
|
64 c->addAlias("Richard Edwards");
|
Chris@1
|
65 }
|
Chris@1
|
66 if (n == "Rimsky-Korsakov, Nikolay Andreyevich") {
|
Chris@1
|
67 c->addAlias("Nikolai Rimsky-Korsakov");
|
Chris@1
|
68 }
|
Chris@1
|
69 if (n.startsWith("Piccinni, Nico")) {
|
Chris@1
|
70 c->addAlias(n);
|
Chris@1
|
71 c->setName(QString::fromUtf8("Piccinni, Niccol\303\262"));
|
Chris@1
|
72 }
|
Chris@1
|
73 if (n == "Tchaikovsky, Pyotr Ilyich") {
|
Chris@1
|
74 c->addAlias("Tchaikovsky, Piotr Ilyitch");
|
Chris@1
|
75 }
|
Chris@1
|
76 if (n == "Wilhelm Stenhammar") {
|
Chris@1
|
77 c->addAlias("Stenhammar, Vilhelm Eugene");
|
Chris@1
|
78 c->setName("Stenhammar, Wilhelm");
|
Chris@1
|
79 c->addAlias(n);
|
Chris@1
|
80 }
|
Chris@1
|
81 if (n == "Mercadante, Saverio Rafaele") {
|
Chris@1
|
82 c->addAlias("Mercadante, Giuseppe");
|
Chris@1
|
83 }
|
Chris@1
|
84 if (n == "Johann Wenzel Anton Stamitz") {
|
Chris@1
|
85 c->addAlias(n);
|
Chris@1
|
86 c->setName("Stamitz, Johann Wenzel Anton");
|
Chris@1
|
87 c->addAlias("Stamitz, Jan Vaclav");
|
Chris@1
|
88 }
|
Chris@1
|
89 if (n == "Mario Castelnuovo-Tedesco") {
|
Chris@1
|
90 c->addAlias("Castelnuovo Tedesco, Mario");
|
Chris@1
|
91 }
|
Chris@0
|
92 if (n == "Mayr, Simon") {
|
Chris@0
|
93 c->addAlias("Mayr");
|
Chris@0
|
94 }
|
Chris@0
|
95
|
Chris@0
|
96 n.replace(", Sr.", " Sr.");
|
Chris@0
|
97 n.replace(", Jr.", " Jr.");
|
Chris@0
|
98
|
Chris@0
|
99 int comma = n.indexOf(", ");
|
Chris@0
|
100 if (comma > 0 && comma + 2 < n.length()) {
|
Chris@0
|
101
|
Chris@0
|
102 QString left = n.left(comma);
|
Chris@0
|
103 QString right = n.right(n.length() - comma - 2);
|
Chris@0
|
104
|
Chris@0
|
105 QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$");
|
Chris@0
|
106 if (jrsr.indexIn(right) >= 0) {
|
Chris@0
|
107 left = left + jrsr.cap(1);
|
Chris@0
|
108 right = right.left(right.length()-jrsr.matchedLength());
|
Chris@0
|
109 }
|
Chris@0
|
110 n = right + " " + left;
|
Chris@0
|
111 }
|
Chris@0
|
112
|
Chris@0
|
113 if (n != c->name()) c->addAlias(n);
|
Chris@0
|
114
|
Chris@0
|
115 if (n.contains("Sergey")) {
|
Chris@0
|
116 QString nn(n);
|
Chris@0
|
117 nn.replace("Sergey", "Sergei");
|
Chris@0
|
118 c->addAlias(nn);
|
Chris@1
|
119 } else if (n.contains("Sergei")) {
|
Chris@1
|
120 QString nn(n);
|
Chris@1
|
121 nn.replace("Sergei", "Sergey");
|
Chris@1
|
122 c->addAlias(nn);
|
Chris@0
|
123 }
|
Chris@0
|
124
|
Chris@0
|
125 QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive);
|
Chris@0
|
126 if (sr.indexIn(n) >= 0) {
|
Chris@0
|
127 QString nr = n;
|
Chris@0
|
128 nr.replace(sr.pos(0), sr.matchedLength(), " I");
|
Chris@0
|
129 nr.replace(" ", " ");
|
Chris@0
|
130 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
|
Chris@0
|
131 c->addAlias(nr);
|
Chris@0
|
132 }
|
Chris@0
|
133 QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive);
|
Chris@0
|
134 if (jr.indexIn(n) >= 0) {
|
Chris@0
|
135 QString nr = n;
|
Chris@0
|
136 nr.replace(jr.pos(0), jr.matchedLength(), " II");
|
Chris@0
|
137 nr.replace(" ", " ");
|
Chris@0
|
138 DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl;
|
Chris@0
|
139 c->addAlias(nr);
|
Chris@0
|
140 }
|
Chris@0
|
141 QString nr = n;
|
Chris@0
|
142 nr.replace("(I)", "I");
|
Chris@0
|
143 nr.replace("(II)", "II");
|
Chris@0
|
144 nr.replace("(III)", "III");
|
Chris@0
|
145 c->addAlias(nr);
|
Chris@0
|
146 }
|
Chris@0
|
147
|
Chris@0
|
148 bool
|
Chris@0
|
149 hasBetterName(Composer *c, Composer *other)
|
Chris@0
|
150 {
|
Chris@0
|
151 if (c->name() == other->name()) return false;
|
Chris@0
|
152
|
Chris@0
|
153 // Try to guess which of c and other is more likely to have a good
|
Chris@0
|
154 // "canonical form" of the composer's name
|
Chris@0
|
155
|
Chris@0
|
156 if (c->name().startsWith("van ")) {
|
Chris@0
|
157 return false; // wrong choice of sort for e.g. LvB; should be
|
Chris@0
|
158 // Beethoven, Ludwig van, not van Beethoven, Ludwig
|
Chris@0
|
159 }
|
Chris@0
|
160 if (other->name().startsWith("van ")) {
|
Chris@0
|
161 return true;
|
Chris@0
|
162 }
|
Chris@0
|
163
|
Chris@0
|
164 if (c->aliases().size() != other->aliases().size()) {
|
Chris@0
|
165 // a rather weak heuristic
|
Chris@0
|
166 return c->aliases().size() > other->aliases().size();
|
Chris@0
|
167 }
|
Chris@0
|
168
|
Chris@0
|
169 if (c->name().contains(',') && !other->name().contains(',')) {
|
Chris@0
|
170 // another rather weak heuristic
|
Chris@0
|
171 return true;
|
Chris@0
|
172 }
|
Chris@0
|
173
|
Chris@0
|
174 return false;
|
Chris@0
|
175 }
|
Chris@0
|
176
|
Chris@0
|
177 void mergeComposer(Composer *c, ComposerMap &composers)
|
Chris@0
|
178 {
|
Chris@0
|
179 QString name = c->name();
|
Chris@0
|
180
|
Chris@0
|
181 QSet<QString> allNames = c->aliases();
|
Chris@0
|
182 allNames.insert(name);
|
Chris@0
|
183
|
Chris@0
|
184 QString dates;
|
Chris@0
|
185 if (c->birth()) {
|
Chris@0
|
186 if (c->death()) {
|
Chris@0
|
187 dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year());
|
Chris@0
|
188 } else {
|
Chris@0
|
189 dates = QString("%1-").arg(c->birth()->year());
|
Chris@0
|
190 }
|
Chris@0
|
191 }
|
Chris@0
|
192 if (dates != "") {
|
Chris@0
|
193 allNames.insert(dates);
|
Chris@0
|
194 }
|
Chris@0
|
195
|
Chris@0
|
196 QSet<Composer *> matches;
|
Chris@0
|
197
|
Chris@0
|
198 foreach (QString candidateName, allNames) {
|
Chris@10
|
199 QString key = Composer::reduceName(candidateName);
|
Chris@0
|
200 if (composers.contains(key)) {
|
Chris@0
|
201 foreach (Composer *candidate, composers[key]) {
|
Chris@0
|
202 if (candidateName == dates) {
|
Chris@5
|
203 if (c->name() == candidate->name()) {
|
Chris@5
|
204 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl;
|
Chris@10
|
205 } else if (!candidate->matchCatalogueName(c->name()) &&
|
Chris@10
|
206 !c->matchCatalogueName(candidate->name())) {
|
Chris@0
|
207 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
|
Chris@0
|
208 continue;
|
Chris@0
|
209 } else {
|
Chris@0
|
210 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
|
Chris@0
|
211 }
|
Chris@0
|
212 } else {
|
Chris@10
|
213 if (!c->matchDates(candidate)) {
|
Chris@0
|
214 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl;
|
Chris@0
|
215 continue;
|
Chris@0
|
216 }
|
Chris@0
|
217 }
|
Chris@0
|
218 matches.insert(candidate);
|
Chris@0
|
219 }
|
Chris@0
|
220 }
|
Chris@0
|
221 }
|
Chris@0
|
222
|
Chris@0
|
223 if (matches.empty()) {
|
Chris@0
|
224 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
|
Chris@0
|
225
|
Chris@0
|
226 if (!c->birth() && !c->death()) {
|
Chris@5
|
227 DEBUG << "Composer has no dates, laboriously searching for all names" << endl;
|
Chris@0
|
228 // laboriously look for fuzzy match across _all_ composers
|
Chris@0
|
229 for (ComposerMap::iterator i = composers.begin();
|
Chris@0
|
230 i != composers.end(); ++i) {
|
Chris@0
|
231 foreach (Composer *candidate, *i) {
|
Chris@10
|
232 if (candidate->matchCatalogueName(c->name())) {
|
Chris@0
|
233 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl;
|
Chris@0
|
234 matches.insert(candidate);
|
Chris@0
|
235 break;
|
Chris@0
|
236 }
|
Chris@0
|
237 }
|
Chris@0
|
238 if (!matches.empty()) break;
|
Chris@0
|
239 }
|
Chris@0
|
240 }
|
Chris@0
|
241
|
Chris@0
|
242 if (matches.empty()) {
|
Chris@0
|
243 foreach (QString candidateName, allNames) {
|
Chris@10
|
244 QString key = Composer::reduceName(candidateName);
|
Chris@5
|
245 composers[key].insert(c);
|
Chris@0
|
246 DEBUG << "added for alias or date " << candidateName << endl;
|
Chris@0
|
247 }
|
Chris@0
|
248 return;
|
Chris@0
|
249 }
|
Chris@0
|
250 }
|
Chris@0
|
251
|
Chris@0
|
252 if (matches.size() > 1) {
|
Chris@0
|
253 DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl;
|
Chris@0
|
254 }
|
Chris@0
|
255
|
Chris@0
|
256 Composer *other = *matches.begin();
|
Chris@0
|
257
|
Chris@0
|
258 DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl;
|
Chris@0
|
259
|
Chris@0
|
260 if (hasBetterName(c, other)) {
|
Chris@0
|
261 other->addAlias(other->name());
|
Chris@0
|
262 other->setName(c->name());
|
Chris@0
|
263 } else {
|
Chris@0
|
264 other->addAlias(c->name());
|
Chris@0
|
265 }
|
Chris@10
|
266 composers[Composer::reduceName(c->name())].insert(other);
|
Chris@0
|
267 DEBUG << "linking from alias " << c->name() << endl;
|
Chris@0
|
268
|
Chris@0
|
269 foreach (QString alias, c->aliases()) {
|
Chris@0
|
270 if (alias != other->name() &&
|
Chris@0
|
271 !other->aliases().contains(alias)) {
|
Chris@0
|
272 other->addAlias(alias);
|
Chris@10
|
273 composers[Composer::reduceName(alias)].insert(other);
|
Chris@0
|
274 DEBUG << "linking from alias " << alias << endl;
|
Chris@0
|
275 }
|
Chris@0
|
276 }
|
Chris@0
|
277
|
Chris@0
|
278 foreach (Document *d, c->pages()) {
|
Chris@0
|
279 bool found = false;
|
Chris@0
|
280 foreach (Document *dd, other->pages()) {
|
Chris@0
|
281 if (d->uri() == dd->uri()) {
|
Chris@0
|
282 found = true;
|
Chris@0
|
283 break;
|
Chris@0
|
284 }
|
Chris@0
|
285 }
|
Chris@0
|
286 if (!found) {
|
Chris@0
|
287 d->setTopic(other);
|
Chris@0
|
288 other->addPage(d);
|
Chris@0
|
289 }
|
Chris@0
|
290 }
|
Chris@0
|
291
|
Chris@0
|
292 //!!! actually the "approximate" bits of the following are bogus;
|
Chris@0
|
293 // a source reporting birth or death date as approx is probably
|
Chris@0
|
294 // more accurate than one reporting an exact date
|
Chris@0
|
295
|
Chris@0
|
296 if (c->birth()) {
|
Chris@0
|
297 if (!other->birth() || other->birth()->approximate()) {
|
Chris@0
|
298 other->setBirth(c->birth());
|
Chris@0
|
299 }
|
Chris@0
|
300 }
|
Chris@0
|
301
|
Chris@0
|
302 if (c->death()) {
|
Chris@0
|
303 if (!other->death() || other->death()->approximate()) {
|
Chris@0
|
304 other->setDeath(c->death());
|
Chris@0
|
305 }
|
Chris@0
|
306 }
|
Chris@0
|
307
|
Chris@0
|
308 if (c->gender() != "") other->setGender(c->gender());
|
Chris@4
|
309
|
Chris@4
|
310 foreach (QString s, c->nationality()) {
|
Chris@4
|
311 other->addNationality(s);
|
Chris@4
|
312 }
|
Chris@4
|
313
|
Chris@18
|
314 foreach (Uri s, c->geonameURIs()) {
|
Chris@4
|
315 other->addGeonameURI(s);
|
Chris@4
|
316 }
|
Chris@4
|
317
|
Chris@0
|
318 if (c->remarks() != "") other->setRemarks(c->remarks());
|
Chris@0
|
319 if (c->period() != "") other->setPeriod(c->period());
|
Chris@0
|
320
|
Chris@0
|
321 }
|
Chris@0
|
322
|
Chris@0
|
323 QString
|
Chris@0
|
324 asciify(QString field)
|
Chris@0
|
325 {
|
Chris@0
|
326 // accented characters etc -- add "ascii version" for dumb search purposes
|
Chris@0
|
327 QString ascii;
|
Chris@0
|
328 for (int i = 0; i < field.length(); ++i) {
|
Chris@0
|
329 QString dc = field[i].decomposition();
|
Chris@0
|
330 if (dc != "") ascii += dc[0];
|
Chris@0
|
331 else if (field[i] == QChar(0x00DF)) {
|
Chris@0
|
332 ascii += "ss";
|
Chris@0
|
333 } else {
|
Chris@0
|
334 ascii += field[i];
|
Chris@0
|
335 }
|
Chris@0
|
336 }
|
Chris@0
|
337 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
|
Chris@0
|
338 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
|
Chris@0
|
339 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
|
Chris@0
|
340 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
|
Chris@0
|
341 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
|
Chris@0
|
342 return ascii;
|
Chris@0
|
343 }
|
Chris@0
|
344
|
Chris@0
|
345 void
|
Chris@0
|
346 asciify(Composer *c)
|
Chris@0
|
347 {
|
Chris@0
|
348 QString n = c->name();
|
Chris@0
|
349 QString asc = asciify(n);
|
Chris@0
|
350 if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc);
|
Chris@0
|
351 foreach (QString alias, c->aliases()) {
|
Chris@0
|
352 asc = asciify(alias);
|
Chris@0
|
353 if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc);
|
Chris@0
|
354 }
|
Chris@0
|
355 }
|
Chris@0
|
356
|
Chris@0
|
357 void
|
Chris@0
|
358 asciify(Work *w)
|
Chris@0
|
359 {
|
Chris@0
|
360 QString n = w->name();
|
Chris@0
|
361 QString asc = asciify(n);
|
Chris@0
|
362 if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc);
|
Chris@0
|
363 foreach (QString alias, w->aliases()) {
|
Chris@0
|
364 asc = asciify(alias);
|
Chris@0
|
365 if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc);
|
Chris@0
|
366 }
|
Chris@0
|
367 }
|
Chris@0
|
368
|
Chris@0
|
369 void
|
Chris@0
|
370 assignUri(Store *s, Composer *c)
|
Chris@0
|
371 {
|
Chris@0
|
372 static QSet<QString> convSet;
|
Chris@0
|
373 QString conv = c->name();
|
Chris@0
|
374 if (!conv.contains(",")) {
|
Chris@0
|
375 QStringList sl = conv.split(" ");
|
Chris@0
|
376 if (!sl.empty()) {
|
Chris@0
|
377 sl.push_front(sl[sl.size()-1]);
|
Chris@0
|
378 sl.removeLast();
|
Chris@0
|
379 conv = sl.join(" ");
|
Chris@0
|
380 DEBUG << "assignUri: " << c->name() << " -> " << conv << endl;
|
Chris@0
|
381 }
|
Chris@0
|
382 }
|
Chris@0
|
383 conv = asciify(conv);
|
Chris@0
|
384 conv.replace(" ", "_");
|
Chris@0
|
385 conv.replace("-", "_");
|
Chris@0
|
386 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
|
Chris@0
|
387 conv = conv.toLower();
|
Chris@0
|
388 QString initial = conv;
|
Chris@1
|
389 int i = 2;
|
Chris@0
|
390 while (convSet.contains(conv)) {
|
Chris@0
|
391 conv = QString("%1__%2").arg(initial).arg(i);
|
Chris@0
|
392 i++;
|
Chris@0
|
393 }
|
Chris@0
|
394 convSet.insert(conv);
|
Chris@18
|
395 c->setProperty("uri", QVariant::fromValue(s->expand(":composer/" + conv)));
|
Chris@0
|
396 }
|
Chris@0
|
397
|
Chris@0
|
398 void
|
Chris@0
|
399 assignUri(Store *s, Work *w, Composer *c)
|
Chris@0
|
400 {
|
Chris@18
|
401 QString pfx = c->property("uri").value<Uri>().toString();
|
Chris@0
|
402 DEBUG << "pfx = " << pfx << endl;
|
Chris@2
|
403 if (!pfx.contains("composer/")) pfx = ":work/";
|
Chris@2
|
404 else {
|
Chris@2
|
405 pfx.replace("composer/", "work/");
|
Chris@2
|
406 pfx += "/";
|
Chris@2
|
407 }
|
Chris@0
|
408
|
Chris@0
|
409 static QSet<QString> convSet;
|
Chris@1
|
410
|
Chris@0
|
411 QString conv = w->catalogue();
|
Chris@0
|
412 if (conv == "") conv = w->opus();
|
Chris@0
|
413 conv = conv.replace(".", "");
|
Chris@0
|
414 bool hasOpus = (conv != "");
|
Chris@1
|
415 if (conv == "") conv = w->name().toLower();
|
Chris@0
|
416 if (w->number() != "") conv = conv + "_no" + w->number();
|
Chris@0
|
417 conv = asciify(conv);
|
Chris@0
|
418 conv.replace(" ", "_");
|
Chris@0
|
419 conv.replace("-", "_");
|
Chris@0
|
420 conv.replace(":", "_");
|
Chris@0
|
421 conv.replace(QRegExp("[^a-zA-Z0-9_-]"), "");
|
Chris@1
|
422
|
Chris@2
|
423 if (pfx != "") conv = pfx + conv;
|
Chris@1
|
424
|
Chris@0
|
425 // I think actually for works we want to merge duplicates rather than
|
Chris@0
|
426 // assign them separate URIs, _unless_ they lack a viable opus number
|
Chris@0
|
427 if (!hasOpus) {
|
Chris@0
|
428 QString initial = conv;
|
Chris@1
|
429 int i = 2;
|
Chris@0
|
430 while (convSet.contains(conv)) {
|
Chris@0
|
431 conv = QString("%1__%2").arg(initial).arg(i);
|
Chris@0
|
432 i++;
|
Chris@0
|
433 }
|
Chris@0
|
434 }
|
Chris@0
|
435 convSet.insert(conv);
|
Chris@1
|
436
|
Chris@1
|
437 w->setProperty("uri", conv);
|
Chris@0
|
438 }
|
Chris@0
|
439
|
Chris@0
|
440 void
|
Chris@0
|
441 addDbpediaResource(Store *store, QObject *o, QString s)
|
Chris@0
|
442 {
|
Chris@18
|
443 Uri u = o->property("uri").value<Uri>();
|
Chris@18
|
444 if (u == Uri()) return;
|
Chris@0
|
445 if (s.startsWith("http://en.wikipedia.org/wiki/")) {
|
Chris@0
|
446 store->add(Triple(u,
|
Chris@0
|
447 "mo:wikipedia",
|
Chris@18
|
448 Uri(s)));
|
Chris@0
|
449 s.replace("http://en.wikipedia.org/wiki/",
|
Chris@0
|
450 "http://dbpedia.org/resource/");
|
Chris@0
|
451 store->add(Triple(u,
|
Chris@0
|
452 "owl:sameAs",
|
Chris@18
|
453 Uri(s)));
|
Chris@0
|
454 }
|
Chris@0
|
455 }
|
Chris@0
|
456
|
Chris@0
|
457 int main(int argc, char **argv)
|
Chris@0
|
458 {
|
Chris@0
|
459 qRegisterMetaType<ClassicalComposersOrgImporter *>
|
Chris@0
|
460 ("ClassicalData::ClassicalComposersOrgImporter*");
|
Chris@0
|
461 qRegisterMetaType<ClassicalDotNetImporter *>
|
Chris@0
|
462 ("ClassicalData::ClassicalDotNetImporter*");
|
Chris@4
|
463 qRegisterMetaType<ClassicalArchivesImporter *>
|
Chris@4
|
464 ("ClassicalData::ClassicalArchivesImporter*");
|
Chris@0
|
465 qRegisterMetaType<WikipediaComposersImporter *>
|
Chris@0
|
466 ("ClassicalData::WikipediaComposersImporter*");
|
Chris@0
|
467 qRegisterMetaType<WikipediaWorksImporter *>
|
Chris@0
|
468 ("ClassicalData::WikipediaWorksImporter*");
|
Chris@0
|
469 qRegisterMetaType<WikipediaWorksKImporter *>
|
Chris@0
|
470 ("ClassicalData::WikipediaWorksKImporter*");
|
Chris@0
|
471 qRegisterMetaType<WikipediaWorksListImporter *>
|
Chris@0
|
472 ("ClassicalData::WikipediaWorksListImporter*");
|
Chris@0
|
473 qRegisterMetaType<HobokenImporter *>
|
Chris@0
|
474 ("ClassicalData::HobokenImporter*");
|
Chris@0
|
475
|
Chris@0
|
476 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
477 <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*");
|
Chris@0
|
478 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
479 <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*");
|
Chris@0
|
480 ObjectBuilder::getInstance()->registerClass
|
Chris@4
|
481 <ClassicalArchivesImporter>("ClassicalData::ClassicalArchivesImporter*");
|
Chris@4
|
482 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
483 <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*");
|
Chris@0
|
484 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
485 <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*");
|
Chris@0
|
486 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
487 <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*");
|
Chris@0
|
488 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
489 <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*");
|
Chris@0
|
490 ObjectBuilder::getInstance()->registerClass
|
Chris@0
|
491 <HobokenImporter>("ClassicalData::HobokenImporter*");
|
Chris@0
|
492
|
Chris@20
|
493 BasicStore *store = BasicStore::load(QUrl("file:importers.ttl"));
|
Chris@28
|
494 ObjectLoader loader(store);
|
Chris@28
|
495 QObject *parentObject = loader.loadAllObjects(new QObject());
|
Chris@0
|
496
|
Chris@0
|
497 BasicStore *outstore = new BasicStore();
|
Chris@20
|
498 outstore->setBaseUri(Uri("http://dbtune.org/classical/resource/"));
|
Chris@28
|
499 ObjectStorer storer(outstore);
|
Chris@28
|
500 TypeMapping tm;
|
Chris@0
|
501
|
Chris@23
|
502 TypeRegistrar::registerTypes();
|
Chris@28
|
503 TypeRegistrar::addMappings(outstore, &tm);
|
Chris@7
|
504
|
Chris@28
|
505 storer.setTypeMapping(tm);
|
Chris@28
|
506 storer.setPropertyStorePolicy(ObjectStorer::StoreIfChanged);
|
Chris@28
|
507 storer.setObjectStorePolicy(ObjectStorer::StoreAllObjects);
|
Chris@28
|
508 storer.setBlankNodePolicy(ObjectStorer::NoBlankNodes);
|
Chris@1
|
509
|
Chris@0
|
510 QList<Importer *> importers = parentObject->findChildren<Importer *>();
|
Chris@0
|
511 std::cerr << "have " << importers.size() << " importers" << std::endl;
|
Chris@0
|
512
|
Chris@0
|
513 ComposerMap composers;
|
Chris@0
|
514
|
Chris@0
|
515 QList<Composer *> dated;
|
Chris@0
|
516 QList<Composer *> undated;
|
Chris@0
|
517
|
Chris@0
|
518 QList<Work *> works;
|
Chris@0
|
519 QList<Composition *> compositions;
|
Chris@0
|
520 QList<QObject *> other;
|
Chris@0
|
521
|
Chris@0
|
522 foreach (Importer *importer, importers) {
|
Chris@0
|
523 QObjectList objects = importer->getImportedObjects();
|
Chris@0
|
524 foreach (QObject *o, objects) {
|
Chris@0
|
525 Composer *c;
|
Chris@0
|
526 if ((c = qobject_cast<Composer *>(o))) {
|
Chris@0
|
527 addMiscExpansions(c);
|
Chris@0
|
528 asciify(c);
|
Chris@0
|
529 if (c->birth() || c->death()) dated.push_back(c);
|
Chris@0
|
530 else undated.push_back(c);
|
Chris@0
|
531 continue;
|
Chris@0
|
532 }
|
Chris@0
|
533 Work *w;
|
Chris@0
|
534 if ((w = qobject_cast<Work *>(o))) {
|
Chris@0
|
535 asciify(w);
|
Chris@0
|
536 works.push_back(w);
|
Chris@0
|
537 continue;
|
Chris@0
|
538 }
|
Chris@0
|
539 Composition *cn;
|
Chris@0
|
540 if ((cn = qobject_cast<Composition *>(o))) {
|
Chris@0
|
541 compositions.push_back(cn);
|
Chris@0
|
542 continue;
|
Chris@0
|
543 }
|
Chris@0
|
544 }
|
Chris@0
|
545 }
|
Chris@0
|
546
|
Chris@0
|
547 // get all the dated composers merged before attempting to match
|
Chris@0
|
548 // the undated ones
|
Chris@0
|
549 foreach (Composer *c, dated) {
|
Chris@0
|
550 mergeComposer(c, composers);
|
Chris@0
|
551 }
|
Chris@0
|
552 foreach (Composer *c, undated) {
|
Chris@0
|
553 mergeComposer(c, composers);
|
Chris@0
|
554 }
|
Chris@0
|
555
|
Chris@0
|
556 QObjectList toStore;
|
Chris@0
|
557
|
Chris@0
|
558 QSet<Composer *> cset;
|
Chris@0
|
559 for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) {
|
Chris@0
|
560 foreach (Composer *c, i.value()) {
|
Chris@0
|
561 if (!cset.contains(c)) {
|
Chris@0
|
562 assignUri(outstore, c);
|
Chris@0
|
563 toStore.push_back(c);
|
Chris@0
|
564 cset.insert(c);
|
Chris@0
|
565 }
|
Chris@0
|
566 foreach (Document *d, c->pages()) {
|
Chris@0
|
567 QString s = d->uri().toString();
|
Chris@0
|
568 addDbpediaResource(outstore, c, s);
|
Chris@0
|
569 }
|
Chris@0
|
570 }
|
Chris@0
|
571 }
|
Chris@0
|
572
|
Chris@0
|
573 QSet<QString> storedUris;
|
Chris@0
|
574
|
Chris@0
|
575 foreach (Work *w, works) {
|
Chris@0
|
576 Composition *cn = w->composition();
|
Chris@0
|
577 if (!cn) continue;
|
Chris@0
|
578 if (!cn->composer()) {
|
Chris@0
|
579 QString cname = cn->composerName();
|
Chris@10
|
580 QString key = Composer::reduceName(cname);
|
Chris@0
|
581 if (cname != "") {
|
Chris@5
|
582 if (!composers.contains(key)) {
|
Chris@0
|
583 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
|
Chris@0
|
584 } else {
|
Chris@5
|
585 QSet<Composer *> cs = composers[key];
|
Chris@0
|
586 if (cs.empty()) {
|
Chris@0
|
587 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
|
Chris@0
|
588 } else if (cs.size() > 1) {
|
Chris@0
|
589 DEBUG << "Failed to assign Composition to composer: "
|
Chris@0
|
590 << cs.size() << " composers match name " << cname << endl;
|
Chris@0
|
591 } else {
|
Chris@0
|
592 cn->setComposer(*cs.begin());
|
Chris@0
|
593 }
|
Chris@0
|
594 }
|
Chris@0
|
595 } else {
|
Chris@0
|
596 DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl;
|
Chris@0
|
597 }
|
Chris@0
|
598 }
|
Chris@0
|
599
|
Chris@0
|
600 if (cn->composer()) {
|
Chris@0
|
601 assignUri(outstore, w, cn->composer());
|
Chris@0
|
602 }
|
Chris@0
|
603
|
Chris@0
|
604 foreach (Document *d, w->pages()) {
|
Chris@0
|
605 QString s = d->uri().toString();
|
Chris@0
|
606 addDbpediaResource(outstore, w, s);
|
Chris@1
|
607 if (!storedUris.contains(s)) {
|
Chris@1
|
608 toStore.push_back(d);
|
Chris@1
|
609 storedUris.insert(s);
|
Chris@1
|
610 }
|
Chris@0
|
611 }
|
Chris@0
|
612
|
Chris@18
|
613 QString u = w->property("uri").value<Uri>().toString();
|
Chris@0
|
614 if (u == "" || !storedUris.contains(u)) {
|
Chris@0
|
615 toStore.push_back(w);
|
Chris@0
|
616 if (u != "") storedUris.insert(u);
|
Chris@0
|
617 }
|
Chris@0
|
618 }
|
Chris@0
|
619
|
Chris@0
|
620 try {
|
Chris@28
|
621 storer.storeAllObjects(toStore);
|
Chris@0
|
622
|
Chris@0
|
623 } catch (RDFException e) {
|
Chris@0
|
624 std::cerr << "Caught RDF exception: " << e.what() << std::endl;
|
Chris@0
|
625 }
|
Chris@0
|
626
|
Chris@0
|
627 DEBUG << "Stored, now saving" << endl;
|
Chris@0
|
628
|
Chris@2
|
629 outstore->save("imported.ttl");
|
Chris@0
|
630
|
Chris@0
|
631 DEBUG << "Saved" << endl;
|
Chris@0
|
632
|
Chris@0
|
633
|
Chris@0
|
634 QMultiMap<QString, Composer *> cmap;
|
Chris@0
|
635 foreach (Composer *c, cset) {
|
Chris@0
|
636 QString n = c->getSortName(true);
|
Chris@0
|
637 cmap.insert(n, c);
|
Chris@0
|
638 }
|
Chris@0
|
639
|
Chris@0
|
640 std::cout << "Composers: " << cmap.size() << std::endl;
|
Chris@0
|
641
|
Chris@0
|
642 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
|
Chris@0
|
643 i != cmap.end(); ++i) {
|
Chris@0
|
644
|
Chris@0
|
645 QString n = i.key();
|
Chris@0
|
646 Composer *c = i.value();
|
Chris@0
|
647
|
Chris@0
|
648 std::cout << n.toStdString();
|
Chris@0
|
649
|
Chris@0
|
650 QString d = c->getDisplayDates();
|
Chris@0
|
651 if (d != "") std::cout << " (" << d.toStdString() << ")";
|
Chris@0
|
652 std::cout << std::endl;
|
Chris@0
|
653 }
|
Chris@0
|
654
|
Chris@0
|
655 std::cout << std::endl;
|
Chris@0
|
656
|
Chris@0
|
657 std::cout << "Works by composer:" << std::endl;
|
Chris@0
|
658
|
Chris@0
|
659 for (QMultiMap<QString, Composer *>::iterator i = cmap.begin();
|
Chris@0
|
660 i != cmap.end(); ++i) {
|
Chris@0
|
661
|
Chris@0
|
662 QString n = i.key();
|
Chris@0
|
663 Composer *c = i.value();
|
Chris@0
|
664
|
Chris@0
|
665 std::set<Work *, Work::Ordering> wmap;
|
Chris@0
|
666 foreach (Work *w, works) {
|
Chris@0
|
667 Composition *cn = w->composition();
|
Chris@0
|
668 if (!cn) continue;
|
Chris@0
|
669 if (cn->composer() != c) continue;
|
Chris@0
|
670 if (w->partOf()) continue;
|
Chris@0
|
671 wmap.insert(w);
|
Chris@0
|
672 }
|
Chris@0
|
673
|
Chris@0
|
674 if (wmap.empty()) continue;
|
Chris@0
|
675
|
Chris@0
|
676 std::cout << n.toStdString() << std::endl;
|
Chris@0
|
677
|
Chris@0
|
678 foreach (Work *w, wmap) {
|
Chris@0
|
679 std::cout << " * ";
|
Chris@0
|
680 std::cout << w->name().toStdString();
|
Chris@0
|
681 if (w->catalogue() != "") {
|
Chris@0
|
682 std::cout << " [" << w->catalogue().toStdString() << "]";
|
Chris@0
|
683 }
|
Chris@0
|
684 if (w->opus() != "") {
|
Chris@0
|
685 std::cout << " [op. " << w->opus().toStdString() << "]";
|
Chris@0
|
686 }
|
Chris@0
|
687 std::cout << std::endl;
|
Chris@0
|
688 std::set<Work *, Work::Ordering> orderedParts;
|
Chris@0
|
689 foreach (Work *ww, w->parts()) {
|
Chris@0
|
690 orderedParts.insert(ww);
|
Chris@0
|
691 }
|
Chris@0
|
692 foreach (Work *ww, orderedParts) {
|
Chris@0
|
693 std::cout << " ";
|
Chris@0
|
694 if (ww->number() != "") {
|
Chris@0
|
695 std::cout << ww->number().toStdString() << ". ";
|
Chris@0
|
696 }
|
Chris@0
|
697 std::cout << ww->name().toStdString();
|
Chris@0
|
698 if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) {
|
Chris@0
|
699 std::cout << " [" << ww->catalogue().toStdString() << "]";
|
Chris@0
|
700 }
|
Chris@0
|
701 if (ww->opus() != "" && ww->opus() != w->opus()) {
|
Chris@0
|
702 std::cout << " [op. " << ww->opus().toStdString() << "]";
|
Chris@0
|
703 }
|
Chris@0
|
704 std::cout << std::endl;
|
Chris@0
|
705 }
|
Chris@0
|
706 }
|
Chris@0
|
707
|
Chris@0
|
708 std::cout << std::endl;
|
Chris@0
|
709 }
|
Chris@0
|
710
|
Chris@0
|
711 delete outstore;
|
Chris@0
|
712
|
Chris@0
|
713 DEBUG << "Done" << endl;
|
Chris@0
|
714
|
Chris@0
|
715
|
Chris@0
|
716 }
|
Chris@0
|
717
|
Chris@0
|
718
|