Chris@0
|
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
|
Chris@0
|
2
|
Chris@0
|
3 #include "ImportHoboken.h"
|
Chris@0
|
4
|
Chris@0
|
5 #include <dataquay/Debug.h>
|
Chris@0
|
6
|
Chris@0
|
7 #include <QFile>
|
Chris@0
|
8 #include <QFileInfo>
|
Chris@0
|
9 #include <QTextStream>
|
Chris@0
|
10 #include <QRegExp>
|
Chris@0
|
11 #include <QVariant>
|
Chris@0
|
12
|
Chris@0
|
13 #include <exception>
|
Chris@0
|
14
|
Chris@0
|
15 using namespace Dataquay;
|
Chris@0
|
16
|
Chris@0
|
17 namespace ClassicalData {
|
Chris@0
|
18
|
Chris@0
|
19 void
|
Chris@0
|
20 HobokenImporter::setSource(QUrl source)
|
Chris@0
|
21 {
|
Chris@0
|
22 DEBUG << "HobokenImporter::setSource: " << source << endl;
|
Chris@0
|
23 import(source);
|
Chris@0
|
24 }
|
Chris@0
|
25
|
Chris@0
|
26 QString
|
Chris@0
|
27 hobToForm(QString hob)
|
Chris@0
|
28 {
|
Chris@0
|
29 QStringList bits = hob.split(':');
|
Chris@0
|
30 QString group = bits[0];
|
Chris@0
|
31 int num = bits[1].toInt();
|
Chris@0
|
32 if (group == "I") return "symphony";
|
Chris@0
|
33 if (group == "II" && (num <= 24 || !bits[1][0].isDigit())) return "divertimento";
|
Chris@0
|
34 if (group == "III") return "string quartet";
|
Chris@0
|
35 if (group == "IV") return "divertimento";
|
Chris@0
|
36 if (group == "V") return "string trio;trio";
|
Chris@0
|
37 if (group == "VI") return "string duo;duo;sonata";
|
Chris@0
|
38 if (group == "VII") return "concerto";
|
Chris@0
|
39 if (group == "VIII") return "march";
|
Chris@0
|
40 if (group == "IX") return "dance";
|
Chris@0
|
41 if (group == "X") return "divertimento";
|
Chris@0
|
42 if (group == "XI") return "trio";
|
Chris@0
|
43 if (group == "XII") return "duo";
|
Chris@0
|
44 if (group == "XIII") return "concerto";
|
Chris@0
|
45 if (group == "XIV") return "divertimento";
|
Chris@0
|
46 if (group == "XV") return "piano trio;trio";
|
Chris@0
|
47 if (group == "XVI") return "piano sonata;sonata";
|
Chris@0
|
48 if (group == "XVII") return "work for piano";
|
Chris@0
|
49 if (group == "XVIIa") return "work for piano";
|
Chris@0
|
50 if (group == "XVIII") return "piano concerto;concerto";
|
Chris@0
|
51 if (group == "XXII") return "mass";
|
Chris@0
|
52 if (group == "XXIIa") return "requiem";
|
Chris@0
|
53 //!!! choral works
|
Chris@0
|
54 return "";
|
Chris@0
|
55 }
|
Chris@0
|
56
|
Chris@0
|
57 void
|
Chris@0
|
58 HobokenImporter::import(QUrl source)
|
Chris@0
|
59 {
|
Chris@0
|
60 //!!! for now
|
Chris@0
|
61 QString filename = source.toLocalFile();
|
Chris@0
|
62
|
Chris@0
|
63 QFile file(filename);
|
Chris@0
|
64 if (!file.open(QFile::ReadOnly | QFile::Text)) {
|
Chris@0
|
65 throw std::exception();
|
Chris@0
|
66 }
|
Chris@0
|
67
|
Chris@0
|
68 QTextStream stream(&file);
|
Chris@0
|
69 stream.setCodec("UTF-8");
|
Chris@0
|
70
|
Chris@0
|
71 QString composerName = "Joseph Haydn";
|
Chris@0
|
72
|
Chris@0
|
73 DEBUG << "composerName = " << composerName << endl;
|
Chris@0
|
74
|
Chris@0
|
75 QMap<QString, Work *> hobMap;
|
Chris@0
|
76 QMap<int, Work *> opusMap;
|
Chris@0
|
77
|
Chris@0
|
78 while (!stream.atEnd()) {
|
Chris@0
|
79
|
Chris@0
|
80 QString line = stream.readLine();
|
Chris@0
|
81
|
Chris@0
|
82 QString hob = "";
|
Chris@0
|
83
|
Chris@0
|
84 QRegExp hobre("^([\\d][^ _]+_([A-Za-z]*)[^ ]+) ");
|
Chris@0
|
85
|
Chris@0
|
86 if (hobre.indexIn(line) >= 0) {
|
Chris@0
|
87
|
Chris@0
|
88 hob = hobre.cap(1);
|
Chris@0
|
89 Work *w = 0;
|
Chris@0
|
90 Composition *cn = 0;
|
Chris@0
|
91
|
Chris@0
|
92 if (!hobMap.contains(hob)) {
|
Chris@0
|
93 w = new Work();
|
Chris@0
|
94 QString key = hobre.cap(2);
|
Chris@0
|
95 if (key != "") {
|
Chris@0
|
96 if (key.length() > 1 && key[1] == 's') {
|
Chris@0
|
97 key = key[0] + "-flat";
|
Chris@0
|
98 }
|
Chris@0
|
99 if (key[0].isLower()) {
|
Chris@0
|
100 key[0] = key[0].toUpper();
|
Chris@0
|
101 key += " minor";
|
Chris@0
|
102 } else {
|
Chris@0
|
103 key += " major";
|
Chris@0
|
104 }
|
Chris@0
|
105 w->setKey(key);
|
Chris@0
|
106 }
|
Chris@0
|
107 cn = new Composition();
|
Chris@0
|
108 cn->setComposerName(composerName);
|
Chris@0
|
109 cn->addWork(w);
|
Chris@0
|
110 w->setComposition(cn);
|
Chris@0
|
111 hobMap[hob] = w;
|
Chris@0
|
112 } else {
|
Chris@0
|
113 w = hobMap[hob];
|
Chris@0
|
114 cn = w->composition();
|
Chris@0
|
115 }
|
Chris@0
|
116
|
Chris@0
|
117 QRegExp hobre2("^[^ ]+ # (Hob [^ ]*)");
|
Chris@0
|
118 if (hobre2.indexIn(line) >= 0) {
|
Chris@0
|
119 QString hobtext = hobre2.cap(1);
|
Chris@0
|
120 w->setCatalogue(hobtext);
|
Chris@0
|
121 QStringList forms = hobToForm(hobtext).split(";");
|
Chris@0
|
122 foreach (QString f, forms) {
|
Chris@0
|
123 if (f != "") {
|
Chris@0
|
124 w->addForm(Form::getFormByName(f));
|
Chris@0
|
125 }
|
Chris@0
|
126 }
|
Chris@0
|
127 continue;
|
Chris@0
|
128 }
|
Chris@0
|
129
|
Chris@0
|
130 QRegExp titlere("^[^ ]+ @([^ ]+) (.*)");
|
Chris@0
|
131 if (titlere.indexIn(line) >= 0) {
|
Chris@0
|
132 QString title = titlere.cap(2).trimmed();
|
Chris@0
|
133 if (titlere.cap(1) == "en") {
|
Chris@0
|
134 if (w->name() != "") {
|
Chris@0
|
135 w->addAlias(w->name());
|
Chris@0
|
136 }
|
Chris@0
|
137 w->setName(title);
|
Chris@0
|
138 } else {
|
Chris@0
|
139 if (w->name() == "") {
|
Chris@0
|
140 w->setName(title);
|
Chris@0
|
141 } else {
|
Chris@0
|
142 w->addAlias(title);
|
Chris@0
|
143 }
|
Chris@0
|
144 }
|
Chris@0
|
145 continue;
|
Chris@0
|
146 }
|
Chris@0
|
147
|
Chris@0
|
148 QRegExp httpre("^[^ ]+ (http:[^ ]*) *$");
|
Chris@0
|
149 if (httpre.indexIn(line) >= 0) {
|
Chris@0
|
150 QString url = httpre.cap(1).trimmed();
|
Chris@0
|
151 Document *d = new Document;
|
Chris@18
|
152 d->setUri(Uri(url));
|
Chris@0
|
153 d->setTopic(w);
|
Chris@0
|
154 if (url.contains("wikipedia")) d->setSiteName("Wikipedia");
|
Chris@0
|
155 else if (url.contains("klassika.info")) {
|
Chris@0
|
156 d->setSiteName("Klassika - Die deutschsprachigen Klassikseiten");
|
Chris@0
|
157 }
|
Chris@0
|
158 w->addPage(d);
|
Chris@0
|
159 continue;
|
Chris@0
|
160 }
|
Chris@0
|
161
|
Chris@0
|
162 QRegExp datere("^[^ ]+ \\[[^]]*(\\d{4})[^]]*\\]");
|
Chris@0
|
163 if (datere.indexIn(line) >= 0) {
|
Chris@0
|
164 cn->setYear(datere.cap(1).toInt());
|
Chris@0
|
165 continue;
|
Chris@0
|
166 }
|
Chris@0
|
167
|
Chris@0
|
168 QRegExp opre("^[^ ]+ -> ([^ ]+)");
|
Chris@0
|
169 if (opre.indexIn(line) >= 0) {
|
Chris@0
|
170 QString optext = opre.cap(1);
|
Chris@0
|
171 w->setOpus(optext);
|
Chris@0
|
172 if (optext.contains('/')) {
|
Chris@0
|
173 QStringList ops = optext.split('/');
|
Chris@0
|
174 int opno = ops[0].toInt();
|
Chris@0
|
175 if (opno == 0) {
|
Chris@0
|
176 DEBUG << "Failed to convert " << optext << " to op no" << endl;
|
Chris@0
|
177 } else {
|
Chris@0
|
178 if (!opusMap.contains(opno)) {
|
Chris@0
|
179 opusMap[opno] = new Work();
|
Chris@0
|
180 opusMap[opno]->setOpus(ops[0]);
|
Chris@0
|
181 opusMap[opno]->setComposition(new Composition());
|
Chris@0
|
182 opusMap[opno]->composition()->setComposerName(composerName);
|
Chris@0
|
183 }
|
Chris@0
|
184 opusMap[opno]->addPart(w);
|
Chris@0
|
185 w->setPartOf(opusMap[opno]);
|
Chris@0
|
186 w->setOpus(ops[0]);
|
Chris@0
|
187 w->setNumber(ops[1]);
|
Chris@0
|
188 }
|
Chris@0
|
189 }
|
Chris@0
|
190 continue;
|
Chris@0
|
191 }
|
Chris@0
|
192
|
Chris@0
|
193 continue;
|
Chris@0
|
194 }
|
Chris@0
|
195
|
Chris@0
|
196 QRegExp opre("^Opus ([\\d][^ ]*): (.*)");
|
Chris@0
|
197 if (opre.indexIn(line) >= 0) {
|
Chris@0
|
198 QString optext = opre.cap(1);
|
Chris@0
|
199 int opno = optext.toInt();
|
Chris@0
|
200 if (!opusMap.contains(opno)) {
|
Chris@0
|
201 opusMap[opno] = new Work();
|
Chris@0
|
202 opusMap[opno]->setOpus(optext);
|
Chris@0
|
203 opusMap[opno]->setComposition(new Composition());
|
Chris@0
|
204 opusMap[opno]->composition()->setComposerName(composerName);
|
Chris@0
|
205 }
|
Chris@0
|
206 QString title = opre.cap(2);
|
Chris@0
|
207 title.replace("<br>", " - ");
|
Chris@0
|
208 opusMap[opno]->setName(title);
|
Chris@0
|
209 continue;
|
Chris@0
|
210 }
|
Chris@0
|
211
|
Chris@0
|
212 DEBUG << "Failed to match line: " << line << endl;
|
Chris@0
|
213 }
|
Chris@0
|
214
|
Chris@0
|
215 foreach (Work *w, hobMap) m_objects.push_back(w);
|
Chris@0
|
216 foreach (Work *w, opusMap) m_objects.push_back(w);
|
Chris@0
|
217
|
Chris@0
|
218
|
Chris@0
|
219 DEBUG << "Found " << m_objects.size() << " things" << endl;
|
Chris@0
|
220 }
|
Chris@0
|
221
|
Chris@0
|
222
|
Chris@0
|
223 }
|
Chris@0
|
224
|
Chris@0
|
225
|