annotate import/ImportHoboken.cpp @ 53:bcea875d8d2f tip

More build fixes
author Chris Cannam
date Thu, 16 Oct 2014 19:03:51 +0100
parents c8ef23d3888c
children
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "ImportHoboken.h"
Chris@0 4
Chris@0 5 #include <dataquay/Debug.h>
Chris@0 6
Chris@0 7 #include <QFile>
Chris@0 8 #include <QFileInfo>
Chris@0 9 #include <QTextStream>
Chris@0 10 #include <QRegExp>
Chris@0 11 #include <QVariant>
Chris@0 12
Chris@0 13 #include <exception>
Chris@0 14
Chris@0 15 using namespace Dataquay;
Chris@0 16
Chris@0 17 namespace ClassicalData {
Chris@0 18
Chris@0 19 void
Chris@0 20 HobokenImporter::setSource(QUrl source)
Chris@0 21 {
Chris@0 22 DEBUG << "HobokenImporter::setSource: " << source << endl;
Chris@0 23 import(source);
Chris@0 24 }
Chris@0 25
Chris@0 26 QString
Chris@0 27 hobToForm(QString hob)
Chris@0 28 {
Chris@0 29 QStringList bits = hob.split(':');
Chris@0 30 QString group = bits[0];
Chris@0 31 int num = bits[1].toInt();
Chris@0 32 if (group == "I") return "symphony";
Chris@0 33 if (group == "II" && (num <= 24 || !bits[1][0].isDigit())) return "divertimento";
Chris@0 34 if (group == "III") return "string quartet";
Chris@0 35 if (group == "IV") return "divertimento";
Chris@0 36 if (group == "V") return "string trio;trio";
Chris@0 37 if (group == "VI") return "string duo;duo;sonata";
Chris@0 38 if (group == "VII") return "concerto";
Chris@0 39 if (group == "VIII") return "march";
Chris@0 40 if (group == "IX") return "dance";
Chris@0 41 if (group == "X") return "divertimento";
Chris@0 42 if (group == "XI") return "trio";
Chris@0 43 if (group == "XII") return "duo";
Chris@0 44 if (group == "XIII") return "concerto";
Chris@0 45 if (group == "XIV") return "divertimento";
Chris@0 46 if (group == "XV") return "piano trio;trio";
Chris@0 47 if (group == "XVI") return "piano sonata;sonata";
Chris@0 48 if (group == "XVII") return "work for piano";
Chris@0 49 if (group == "XVIIa") return "work for piano";
Chris@0 50 if (group == "XVIII") return "piano concerto;concerto";
Chris@0 51 if (group == "XXII") return "mass";
Chris@0 52 if (group == "XXIIa") return "requiem";
Chris@0 53 //!!! choral works
Chris@0 54 return "";
Chris@0 55 }
Chris@0 56
Chris@0 57 void
Chris@0 58 HobokenImporter::import(QUrl source)
Chris@0 59 {
Chris@0 60 //!!! for now
Chris@0 61 QString filename = source.toLocalFile();
Chris@0 62
Chris@0 63 QFile file(filename);
Chris@0 64 if (!file.open(QFile::ReadOnly | QFile::Text)) {
Chris@0 65 throw std::exception();
Chris@0 66 }
Chris@0 67
Chris@0 68 QTextStream stream(&file);
Chris@0 69 stream.setCodec("UTF-8");
Chris@0 70
Chris@0 71 QString composerName = "Joseph Haydn";
Chris@0 72
Chris@0 73 DEBUG << "composerName = " << composerName << endl;
Chris@0 74
Chris@0 75 QMap<QString, Work *> hobMap;
Chris@0 76 QMap<int, Work *> opusMap;
Chris@0 77
Chris@0 78 while (!stream.atEnd()) {
Chris@0 79
Chris@0 80 QString line = stream.readLine();
Chris@0 81
Chris@0 82 QString hob = "";
Chris@0 83
Chris@0 84 QRegExp hobre("^([\\d][^ _]+_([A-Za-z]*)[^ ]+) ");
Chris@0 85
Chris@0 86 if (hobre.indexIn(line) >= 0) {
Chris@0 87
Chris@0 88 hob = hobre.cap(1);
Chris@0 89 Work *w = 0;
Chris@0 90 Composition *cn = 0;
Chris@0 91
Chris@0 92 if (!hobMap.contains(hob)) {
Chris@0 93 w = new Work();
Chris@0 94 QString key = hobre.cap(2);
Chris@0 95 if (key != "") {
Chris@0 96 if (key.length() > 1 && key[1] == 's') {
Chris@0 97 key = key[0] + "-flat";
Chris@0 98 }
Chris@0 99 if (key[0].isLower()) {
Chris@0 100 key[0] = key[0].toUpper();
Chris@0 101 key += " minor";
Chris@0 102 } else {
Chris@0 103 key += " major";
Chris@0 104 }
Chris@0 105 w->setKey(key);
Chris@0 106 }
Chris@0 107 cn = new Composition();
Chris@0 108 cn->setComposerName(composerName);
Chris@0 109 cn->addWork(w);
Chris@0 110 w->setComposition(cn);
Chris@0 111 hobMap[hob] = w;
Chris@0 112 } else {
Chris@0 113 w = hobMap[hob];
Chris@0 114 cn = w->composition();
Chris@0 115 }
Chris@0 116
Chris@0 117 QRegExp hobre2("^[^ ]+ # (Hob [^ ]*)");
Chris@0 118 if (hobre2.indexIn(line) >= 0) {
Chris@0 119 QString hobtext = hobre2.cap(1);
Chris@0 120 w->setCatalogue(hobtext);
Chris@0 121 QStringList forms = hobToForm(hobtext).split(";");
Chris@0 122 foreach (QString f, forms) {
Chris@0 123 if (f != "") {
Chris@0 124 w->addForm(Form::getFormByName(f));
Chris@0 125 }
Chris@0 126 }
Chris@0 127 continue;
Chris@0 128 }
Chris@0 129
Chris@0 130 QRegExp titlere("^[^ ]+ @([^ ]+) (.*)");
Chris@0 131 if (titlere.indexIn(line) >= 0) {
Chris@0 132 QString title = titlere.cap(2).trimmed();
Chris@0 133 if (titlere.cap(1) == "en") {
Chris@0 134 if (w->name() != "") {
Chris@0 135 w->addAlias(w->name());
Chris@0 136 }
Chris@0 137 w->setName(title);
Chris@0 138 } else {
Chris@0 139 if (w->name() == "") {
Chris@0 140 w->setName(title);
Chris@0 141 } else {
Chris@0 142 w->addAlias(title);
Chris@0 143 }
Chris@0 144 }
Chris@0 145 continue;
Chris@0 146 }
Chris@0 147
Chris@0 148 QRegExp httpre("^[^ ]+ (http:[^ ]*) *$");
Chris@0 149 if (httpre.indexIn(line) >= 0) {
Chris@0 150 QString url = httpre.cap(1).trimmed();
Chris@0 151 Document *d = new Document;
Chris@18 152 d->setUri(Uri(url));
Chris@0 153 d->setTopic(w);
Chris@0 154 if (url.contains("wikipedia")) d->setSiteName("Wikipedia");
Chris@0 155 else if (url.contains("klassika.info")) {
Chris@0 156 d->setSiteName("Klassika - Die deutschsprachigen Klassikseiten");
Chris@0 157 }
Chris@0 158 w->addPage(d);
Chris@0 159 continue;
Chris@0 160 }
Chris@0 161
Chris@0 162 QRegExp datere("^[^ ]+ \\[[^]]*(\\d{4})[^]]*\\]");
Chris@0 163 if (datere.indexIn(line) >= 0) {
Chris@0 164 cn->setYear(datere.cap(1).toInt());
Chris@0 165 continue;
Chris@0 166 }
Chris@0 167
Chris@0 168 QRegExp opre("^[^ ]+ -> ([^ ]+)");
Chris@0 169 if (opre.indexIn(line) >= 0) {
Chris@0 170 QString optext = opre.cap(1);
Chris@0 171 w->setOpus(optext);
Chris@0 172 if (optext.contains('/')) {
Chris@0 173 QStringList ops = optext.split('/');
Chris@0 174 int opno = ops[0].toInt();
Chris@0 175 if (opno == 0) {
Chris@0 176 DEBUG << "Failed to convert " << optext << " to op no" << endl;
Chris@0 177 } else {
Chris@0 178 if (!opusMap.contains(opno)) {
Chris@0 179 opusMap[opno] = new Work();
Chris@0 180 opusMap[opno]->setOpus(ops[0]);
Chris@0 181 opusMap[opno]->setComposition(new Composition());
Chris@0 182 opusMap[opno]->composition()->setComposerName(composerName);
Chris@0 183 }
Chris@0 184 opusMap[opno]->addPart(w);
Chris@0 185 w->setPartOf(opusMap[opno]);
Chris@0 186 w->setOpus(ops[0]);
Chris@0 187 w->setNumber(ops[1]);
Chris@0 188 }
Chris@0 189 }
Chris@0 190 continue;
Chris@0 191 }
Chris@0 192
Chris@0 193 continue;
Chris@0 194 }
Chris@0 195
Chris@0 196 QRegExp opre("^Opus ([\\d][^ ]*): (.*)");
Chris@0 197 if (opre.indexIn(line) >= 0) {
Chris@0 198 QString optext = opre.cap(1);
Chris@0 199 int opno = optext.toInt();
Chris@0 200 if (!opusMap.contains(opno)) {
Chris@0 201 opusMap[opno] = new Work();
Chris@0 202 opusMap[opno]->setOpus(optext);
Chris@0 203 opusMap[opno]->setComposition(new Composition());
Chris@0 204 opusMap[opno]->composition()->setComposerName(composerName);
Chris@0 205 }
Chris@0 206 QString title = opre.cap(2);
Chris@0 207 title.replace("<br>", " - ");
Chris@0 208 opusMap[opno]->setName(title);
Chris@0 209 continue;
Chris@0 210 }
Chris@0 211
Chris@0 212 DEBUG << "Failed to match line: " << line << endl;
Chris@0 213 }
Chris@0 214
Chris@0 215 foreach (Work *w, hobMap) m_objects.push_back(w);
Chris@0 216 foreach (Work *w, opusMap) m_objects.push_back(w);
Chris@0 217
Chris@0 218
Chris@0 219 DEBUG << "Found " << m_objects.size() << " things" << endl;
Chris@0 220 }
Chris@0 221
Chris@0 222
Chris@0 223 }
Chris@0 224
Chris@0 225