Mercurial > hg > classical
changeset 0:e8f4c2b55fd8 classical-rdf
* reorganise
author | Chris Cannam |
---|---|
date | Tue, 01 Dec 2009 17:50:41 +0000 |
parents | |
children | 29ca5974905d |
files | common/Objects.cpp common/Objects.h common/common.pro import/ImportClassicalComposersOrg.cpp import/ImportClassicalComposersOrg.h import/ImportClassicalDotNet.cpp import/ImportClassicalDotNet.h import/ImportHoboken.cpp import/ImportHoboken.h import/ImportWikipediaComposers.cpp import/ImportWikipediaComposers.h import/ImportWikipediaWorks.cpp import/ImportWikipediaWorks.h import/ImportWikipediaWorksK.cpp import/ImportWikipediaWorksK.h import/ImportWikipediaWorksList.cpp import/ImportWikipediaWorksList.h import/Importer.h import/Test.cpp import/importers.ttl |
diffstat | 20 files changed, 4613 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common/Objects.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,183 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "Objects.h" + +#include <dataquay/Debug.h> + +#include <cstdlib> +#include <iostream> + +namespace ClassicalData { + +QMap<QString, Form *> Form::m_map; +QMutex Form::m_mutex; + +QString +Composer::getSortName(bool caps) const +{ + QString n = name(); + QStringList pl = n.split(QRegExp(", *")); + if (pl.size() == 1) { + QStringList pl2; + pl = n.split(' '); + pl2.push_back(pl[pl.size()-1]); + pl2.push_back(""); + for (int i = 0; i+1 < pl.size(); ++i) { + if (i > 0) pl2[1] += " "; + pl2[1] += pl[i]; + } + pl = pl2; + } + if (caps) { + n = pl[0].toUpper(); + } else { + n = pl[0]; + } + for (int i = 1; i < pl.size(); ++i) { + n += ", "; + n += pl[i]; + } + return n; +} + +QString +Composer::getDisplayDates() const +{ + QString s; + if (birth() || death()) { + bool showApprox = false; + if ((birth() && birth()->approximate()) || + (death() && death()->approximate())) { + showApprox = true; + } + if (birth()) { + if (birth()->place() != "") { + s += birth()->place() + ", "; + } + if (showApprox) { + s += "c. "; + showApprox = false; + } + s += QString("%1").arg(birth()->year()); + } + s += "-"; + if (death()) { + if (death()->place() != "") { + s += death()->place() + ", "; + } + if (showApprox) { + s += "c. "; + showApprox = false; + } + s += QString("%1").arg(death()->year()); + } + } + + return s; +} + +static int +compare(QString a, QString b) +{ + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } else { + return 0; + } +} + +static int +compareNumericTexts(QString a, QString b) +{ +// std::cout << "compare " << a.toStdString() +// << " " << b.toStdString() << std::endl; + + if (a == b) return 0; + + if (!a[0].isDigit()) { + if (!b[0].isDigit()) { + QStringList al = a.split(QRegExp("[ :-]")); + QStringList bl = b.split(QRegExp("[ :-]")); + if (al.size() < 2 || bl.size() < 2 || + al.size() != bl.size()) { + if (a < b) return -1; + else if (a > b) return 1; + else return 0; + } + for (int i = 0; i < al.size(); ++i) { + if (al[i] != bl[i]) { +// std::cout << "subcompare " << al[i].toStdString() +// << " " << bl[i].toStdString() << std::endl; + return compareNumericTexts(al[i], bl[i]); + } + } + } else { + return compare(a, b); + } + } else { + if (!b[0].isDigit()) { + return compare(a, b); + } + } + + // use atoi instead of toInt() because we want it to succeed even + // if the text is not only an integer (e.g. 35a) + int aoi = atoi(a.toLocal8Bit().data()); + int boi = atoi(b.toLocal8Bit().data()); + +// std::cout << "aoi = " << aoi << ", boi = " << boi << std::endl; + + if (aoi == boi) return compare(a, b); + else return aoi - boi; +} + +bool +Work::Ordering::operator()(Work *a, Work *b) +{ + if (!a) { + if (!b) return false; + else return true; + } else { + if (!b) { + return false; + } + } +/* + QString ao = a->catalogue(); + if (ao == "") ao = a->opus(); + + QString bo = b->catalogue(); + if (bo == "") bo = b->opus(); + + std::cout << "ao " << ao.toStdString() << ", bo " << bo.toStdString() << std::endl; +*/ + int c = 0; + if (a->catalogue() != "" && b->catalogue() != "") { + c = compareNumericTexts(a->catalogue(), b->catalogue()); + } + if (c == 0 && a->opus() != "" && b->opus() != "") { + c = compareNumericTexts(a->opus(), b->opus()); + } + if (c == 0 && a->partOf() == b->partOf() && + a->number() != "" && b->number() != "") { + c = compareNumericTexts(a->number(), b->number()); + } + + bool rv = false; + + if (c == 0) { + if (a->name() == b->name()) rv = (a < b); + else rv = (a->name() < b->name()); + } else { + rv = (c < 0); + } + +// std::cout << "result = " << rv << std::endl; + return rv; +} + + +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common/Objects.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,350 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _CLASSICAL_DATA_OBJECTS_H_ +#define _CLASSICAL_DATA_OBJECTS_H_ + +#include <QObject> +#include <QMetaType> +#include <QString> +#include <QStringList> +#include <QSharedPointer> +#include <QUrl> +#include <QSet> +#include <QMutex> +#include <QMutexLocker> +#include <QMap> + +namespace ClassicalData { + +class HistoricalEvent : public QObject +{ + Q_OBJECT + + Q_PROPERTY(int year READ year WRITE setYear STORED true) + Q_PROPERTY(QString place READ place WRITE setPlace STORED true) + Q_PROPERTY(bool approximate READ approximate WRITE setApproximate STORED true) + +public: + HistoricalEvent() : m_year(0), m_place(), m_approximate(false) { } + HistoricalEvent(int y) : m_year(y), m_approximate(false) { } + HistoricalEvent(int y, QString p) : m_year(y), m_place(p), m_approximate(false) { } + + int year() const { return m_year; } + void setYear(int y) { m_year = y; } + QString place() const { return m_place; } + void setPlace(QString p) { m_place = p; } + bool approximate() const { return m_approximate; } + void setApproximate(bool a) { m_approximate = a; } + +private: + int m_year; + QString m_place; + bool m_approximate; +}; + +class Birth : public HistoricalEvent +{ + Q_OBJECT + +public: + Birth() : HistoricalEvent() { } + Birth(int y) : HistoricalEvent(y) { } + Birth(int y, QString p) : HistoricalEvent(y, p) { } +}; + +class Death : public HistoricalEvent +{ + Q_OBJECT + +public: + Death() : HistoricalEvent() { } + Death(int y) : HistoricalEvent(y) { } + Death(int y, QString p) : HistoricalEvent(y, p) { } +}; + +class Composer; +class Work; + +class Composition : public HistoricalEvent +{ + Q_OBJECT + + Q_PROPERTY(ClassicalData::Composer *composer READ composer WRITE setComposer STORED true) + Q_PROPERTY(QSet<ClassicalData::Work *> works READ works WRITE setWorks STORED true) + Q_PROPERTY(QString composerName READ composerName WRITE setComposerName STORED false) + +public: + Composition() : HistoricalEvent(), m_composer(0) { } + Composition(int y) : HistoricalEvent(y), m_composer(0) { } + Composition(int y, QString p) : HistoricalEvent(y, p), m_composer(0) { } + + Composer *composer() { return m_composer; } + void setComposer(Composer *c) { m_composer = c; } + + QSet<Work *> works() { return m_works; } + void setWorks(QSet<Work *> c) { m_works = c; } + void addWork(Work *w) { m_works.insert(w); } + + // Not a storable property, set temporarily while composer record is found + QString composerName() const { return m_cname; } + void setComposerName(QString n) { m_cname = n; } + +private: + Composer *m_composer; + QSet<Work *> m_works; + QString m_cname; +}; + +class Document : public QObject +{ + Q_OBJECT + + Q_PROPERTY(QUrl uri READ uri WRITE setUri STORED true) + Q_PROPERTY(QString siteName READ siteName WRITE setSiteName STORED true) + Q_PROPERTY(QObject *topic READ topic WRITE setTopic STORED true) + +public: + Document(QObject *parent = 0) : QObject(parent), m_topic(0) { } + + QUrl uri() const { return m_uri; } + void setUri(QUrl uri) { m_uri = uri; } + + QString siteName() const { return m_siteName; } + void setSiteName(QString n) { m_siteName = n; } + + QObject *topic() const { return m_topic; } + void setTopic(QObject *t) { m_topic = t; } + +private: + QUrl m_uri; + QString m_siteName; + QObject *m_topic; +}; + + +class NamedEntity : public QObject +{ + Q_OBJECT + + Q_PROPERTY(QString name READ name WRITE setName STORED true) + Q_PROPERTY(QSet<QString> aliases READ aliases WRITE setAliases STORED true) + Q_PROPERTY(QString remarks READ remarks WRITE setRemarks STORED true) + Q_PROPERTY(QSet<ClassicalData::Document*> pages READ pages WRITE setPages STORED true) + +public: + NamedEntity(QObject *parent = 0) : QObject(parent) { } + + QString name() const { return m_name; } + void setName(QString n) { m_name = n; } + + QString remarks() const { return m_remarks; } + void setRemarks(QString n) { m_remarks = n; } + + QSet<QString> aliases() const { return m_aliases; } + void setAliases(QSet<QString> l) { m_aliases = l; } + void addAlias(QString a) { m_aliases.insert(a); } + + QSet<Document *> pages() const { return m_pages; } + void addPage(Document *p) { m_pages.insert(p); } + void setPages(QSet<Document *> p) { m_pages = p; } //!!! destroy old ones? do we own? + +private: + QString m_name; + QString m_remarks; + QSet<QString> m_aliases; + QSet<Document *> m_pages; +}; + +class Movement; + +class Form; + +class Work : public NamedEntity +{ + Q_OBJECT + + Q_PROPERTY(QString key READ key WRITE setKey STORED true) + Q_PROPERTY(QString opus READ opus WRITE setOpus STORED true) + Q_PROPERTY(QString catalogue READ catalogue WRITE setCatalogue STORED true) + Q_PROPERTY(QString number READ number WRITE setNumber STORED true) + Q_PROPERTY(QSet<ClassicalData::Form*> forms READ forms WRITE setForms STORED true) + Q_PROPERTY(ClassicalData::Work* partOf READ partOf WRITE setPartOf STORED true) + Q_PROPERTY(QSet<ClassicalData::Work*> parts READ parts WRITE setParts STORED true) + Q_PROPERTY(QSet<ClassicalData::Movement*> movements READ movements WRITE setMovements STORED true) + Q_PROPERTY(ClassicalData::Composition *composition READ composition WRITE setComposition STORED true) + +public: + Work(QObject *parent = 0) : NamedEntity(parent), m_partOf(0), m_composition(0) { } + + QString key() const { return m_key; } + void setKey(QString n) { m_key = n; } + + QString opus() const { return m_opus; } + void setOpus(QString n) { m_opus = n; } + + QString catalogue() const { return m_catalogue; } + void setCatalogue(QString n) { m_catalogue = n; } + + QString number() const { return m_number; } + void setNumber(QString n) { m_number = n; } + + QSet<Form *> forms() const { return m_forms; } + void setForms(QSet<Form *> f) { m_forms = f; } + void addForm(Form *f) { m_forms.insert(f); } + + Work *partOf() const { return m_partOf; } + void setPartOf(Work *w) { m_partOf = w; } + + QSet<Work *> parts() const { return m_parts; } + void setParts(QSet<Work *> l) { m_parts = l; } + void addPart(Work *w) { m_parts.insert(w); } + + QSet<Movement *> movements() const { return m_movements; } + void setMovements(QSet<Movement *> l) { m_movements = l; } + void addMovement(Movement *w) { m_movements.insert(w); } + + Composition *composition() { return m_composition; } + const Composition *composition() const { return m_composition; } + void setComposition(Composition *c) { m_composition = c; } + + struct Ordering { + bool operator()(Work *, Work *); + }; + +private: + QString m_key; + QString m_opus; + QString m_catalogue; + QString m_number; + QSet<Form *> m_forms; + Work *m_partOf; + QSet<Work *> m_parts; + QSet<Movement *> m_movements; + Composition *m_composition; +}; + +class Movement : public NamedEntity +{ + Q_OBJECT + + Q_PROPERTY(QString key READ key WRITE setKey STORED true) + Q_PROPERTY(QString number READ number WRITE setNumber STORED true) + Q_PROPERTY(ClassicalData::Work* partOf READ partOf WRITE setPartOf STORED true) + Q_PROPERTY(QSet<ClassicalData::Movement*> parts READ parts WRITE setParts STORED true) // movements can be nested + Q_PROPERTY(ClassicalData::Composition *composition READ composition WRITE setComposition STORED true) + +public: + Movement(QObject *parent = 0) : NamedEntity(parent), m_partOf(0), m_composition(0) { } + + QString key() const { return m_key; } + void setKey(QString n) { m_key = n; } + + QString number() const { return m_number; } + void setNumber(QString n) { m_number = n; } + + Work *partOf() const { return m_partOf; } + void setPartOf(Work *w) { m_partOf = w; } + + QSet<Movement *> parts() const { return m_parts; } + void setParts(QSet<Movement *> l) { m_parts = l; } + void addPart(Movement *w) { m_parts.insert(w); } + + Composition *composition() { return m_composition; } + const Composition *composition() const { return m_composition; } + void setComposition(Composition *c) { m_composition = c; } + +private: + QString m_key; + QString m_number; + Work *m_partOf; + QSet<Movement *> m_parts; + Composition *m_composition; +}; + +class Composer : public NamedEntity +{ + Q_OBJECT + + Q_PROPERTY(QString gender READ gender WRITE setGender STORED true) + Q_PROPERTY(QString nationality READ nationality WRITE setNationality STORED true) + Q_PROPERTY(QString period READ period WRITE setPeriod STORED true) + Q_PROPERTY(ClassicalData::Birth *birth READ birth WRITE setBirth STORED true) + Q_PROPERTY(ClassicalData::Death *death READ death WRITE setDeath STORED true) + +public: + Composer(QObject *parent = 0) : NamedEntity(parent), m_birth(0), m_death(0) { } + + QString gender() const { return m_gender; } + void setGender(QString n) { m_gender = n; } + + QString nationality() const { return m_nationality; } + void setNationality(QString n) { m_nationality = n; } + + QString period() const { return m_period; } + void setPeriod(QString n) { m_period = n; } + + Birth *birth() { return m_birth; } + const Birth *birth() const { return m_birth; } + void setBirth(Birth *b) { m_birth = b; } + + Death *death() { return m_death; } + const Death *death() const { return m_death; } + void setDeath(Death *d) { m_death = d; } + + QString getSortName(bool caps) const; + QString getDisplayDates() const; + +private: + QString m_gender; + QString m_nationality; + QString m_period; + Birth *m_birth; + Death *m_death; +}; + +class Form : public NamedEntity +{ + Q_OBJECT + + Q_PROPERTY(QString uri READ uri) + +public: + Form(QObject *parent = 0) : NamedEntity(parent) { } + + static Form *getFormByName(QString name) { + QMutexLocker locker(&m_mutex); + if (!m_map.contains(name)) { + Form *f = new Form(); + f->setName(name); + m_map[name] = f; + } + return m_map[name]; + } + + QString uri() const { + return QString(":form_%1").arg(name()).toLower().replace(' ', '_'); + } + +private: + static QMap<QString, Form *> m_map; + static QMutex m_mutex; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::HistoricalEvent*); +Q_DECLARE_METATYPE(ClassicalData::Birth*); +Q_DECLARE_METATYPE(ClassicalData::Death*); +Q_DECLARE_METATYPE(ClassicalData::Composition*); +Q_DECLARE_METATYPE(ClassicalData::Work*); +Q_DECLARE_METATYPE(ClassicalData::Movement*); +Q_DECLARE_METATYPE(ClassicalData::Document*); +Q_DECLARE_METATYPE(QSet<QString>); +Q_DECLARE_METATYPE(QSet<ClassicalData::Work*>); +Q_DECLARE_METATYPE(QSet<ClassicalData::Movement*>); +Q_DECLARE_METATYPE(QSet<ClassicalData::Document*>); +Q_DECLARE_METATYPE(ClassicalData::Composer*); +Q_DECLARE_METATYPE(ClassicalData::Form*); +Q_DECLARE_METATYPE(QSet<ClassicalData::Form*>); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/common/common.pro Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,22 @@ +TEMPLATE = lib +TARGET = +CONFIG += debug staticlib +DEPENDPATH += . +INCLUDEPATH += . ../../turbot + +OBJECTS_DIR = o +MOC_DIR = m + +# Input +HEADERS += Objects.h +SOURCES += Objects.cpp + +solaris* { + + debug { + QMAKE_CXXFLAGS_DEBUG += -xprofile=tcov + LIBS += -xprofile=tcov + } + +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportClassicalComposersOrg.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,337 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportClassicalComposersOrg.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +ClassicalComposersOrgImporter::setSource(QUrl source) +{ + DEBUG << "ClassicalComposersOrgImporter::setSource: " << source << endl; + import(source); +} + +typedef QMap<QString, int> NameMap; + +void +parseNames(QString field, NameMap &names, int score = 0) +{ + QString a(field), b(field); + + int mp; + QRegExp re; + + /* classical-composers.org uses quite a few (not always + * consistent) ways to indicate alternatives in composer + * names. Not all of them are distinguishable. + * Examples: + * + * Pipe used to separate sorted surname from alternative for whole: + * Hardin | Moondog, Louis Thomas + * -> "Louis Thomas Hardin", "Moondog" + * Barron | Charlotte May Wind, Bebe + * -> "Bebe Barron", "Charlotte May Wind" + * + * Pipe used to separate alternatives for surname only (seems + * slightly more common than the previous one; if there is only + * one word between the pipe and a following comma, I'd be + * inclined to assume this case, Moondog notwithstanding): + * Mendelssohn | Hensel, Fanny Cécile + * -> "Fanny Cécile Mendelssohn", "Fanny Cécile Hensel" + * Erskine, 6th Earl of Kellie | Kelly, Thomas Alexander + * -> "Thomas Alexander Erskine, 6th Earl of Kellie", + * "Thomas Alexander Kelly" + * + * Round brackets used to indicate one or more alternatives for + * prior word; slash for alternation: + * Edelmann, Jean-Frédéric (Johann-Friedrich) + * -> "Jean-Frédéric Edelmann", "Johann-Friedrich Edelmann" + * Eberwein, Max (Traugott Maximilian) + * -> "Max Eberwein", "Traugott Maximilian Eberwein" + * Mahaut | Mahault | Mahoti | Mahout, Antoine (Anton/Antonio) + * -> "Antoine Mahaut", "Antoine Mahault", "Antoine Mahoti", + * "Antoine Mahout", "Anton Mahaut", "Anton Mahault", + * "Anton Mahoti", "Anton Mahout", "Antonio Mahaut", + * "Antonio Mahault", "Antonio Mahoti", "Antonio Mahout" + * + * Round brackets used to indicate alternative to prior + * names, with some meaning left implicit: + * Kaan | Kaan-Albest, Jindrich z Albestu (Heinrich) + * -> "Jindrich z Albestu Kaan", "Heinrich Kaan-Albest", + * perhaps "Heinrich Kaan" (but not "Jindrich z Albestu + * Kaan-Albest") + * + * Round brackets used to augment rather than + * alternate. Probably can't identify this reliably, though + * round brackets used somewhere other than at end of line + * are relatively likely to be this form (?): + * Linley (the elder), Thomas + * -> "Thomas Linley", "Thomas Linley the elder" + * Keys | Keyes, Ivor (Christopher Banfield) + * -> "Ivor Keys", "Ivor Keyes", "Ivor Christopher Banfield Keys", + * "Ivor Christopher Banfield Keyes" + * + * Square brackets used to indicate alternative for all + * forenames: + * Moller | Möller, John Christopher [Johann Christoph] + * -> "John Christopher Moller", "John Christopher Möller", + * "Johann Christoph Moller", "Johann Christoph Möller" + * + * Complicated examples: + * Mayr | Mayer, (Johann) Simon [Giovanni Simone] + * -> "Simon Mayr", "Simon Mayer", "Johann Simon Mayr", + * "Johann Simon Mayer", "Giovanni Simone Mayr", + * "Geovanni Simone Mayer" (but not "Johann Giovanni Simone Mayr") + * Frauenlob | Heinrich von Meissen + * -> "Heinrich Frauenlob", "Heinrich von Meissen", or + * perhaps "Frauenlob" (but not "Heinrich von Meissen Frauenlob") + */ + +// DEBUG << "parseNames: field = " << field << ", names contains " << names.size() << " item(s)" << endl; + + // round brackets used for augmentation right at the start + re = QRegExp("\\(([^\\)]+)\\) "); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + a.replace(mp, ml, ""); + b.replace(mp, ml, QString("%1 ").arg(c)); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // round brackets used for augmentation directly after the comma + re = QRegExp(", \\(([^\\)]+)\\)"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + a.replace(mp, ml, ","); + b.replace(mp, ml, QString(", %1").arg(c)); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // round brackets used for augmentation directly before the comma + re = QRegExp(" \\(([^\\)]+)\\),"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + a.replace(mp, ml, ","); + b.replace(mp, ml, QString(" %1,").arg(c)); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // round brackets for alternation of single name, anywhere + re = QRegExp("([^\\[\\(, |]+) \\(([^\\)]+)\\)"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c); + b.replace(mp, ml, d); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // square brackets for alternation of a series of names, at end or after pipe + re = QRegExp("([,|]) ([^\\[|,]+) \\[([^\\]]+)\\]"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString p(re.cap(1)); + QString c(re.cap(2)); + QString d(re.cap(3)); + a.replace(mp, ml, QString("%1 %2").arg(p).arg(c)); + b.replace(mp, ml, QString("%1 %2").arg(p).arg(d)); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // square brackets for alternation of a series of names, at start + re = QRegExp("^([^\\[,]+) \\[([^\\]]+)\\]"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c); + b.replace(mp, ml, d); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // slash for alternation of word + re = QRegExp("([^ ,|]+)/([^ ,|]+)"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c); + b.replace(mp, ml, d); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // pipe for alternation of surname + re = QRegExp("^(.*) \\| ([^|, ]+),"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c + ","); + b.replace(mp, ml, d + ","); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // pipe for alternation of whole (before comma) + re = QRegExp("^(.*) \\| ([^|,]+),"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c + ","); + b = d; + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // pipe for alternation of whole (at end) + re = QRegExp("^(.*) \\| ([^|,]+)$"); + if ((mp = re.indexIn(field)) >= 0) { + int ml = re.matchedLength(); + QString c(re.cap(1)); + QString d(re.cap(2)); + a.replace(mp, ml, c); + b.replace(mp, ml, d); + parseNames(a, names, score); + parseNames(b, names, score+1); + return; + } + + // comma + re = QRegExp("^(.+), ([^,]+)$"); + if ((mp = re.indexIn(field)) >= 0) { + QString c(re.cap(1)); + QString d(re.cap(2)); + parseNames(d + " " + c, names, score+1); + // fall through to add + } + + names[field] = score; +} + +void +ClassicalComposersOrgImporter::import(QUrl source) +{ + int i = 0; + + //!!! for now + QString filename = source.toLocalFile(); + + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + QString all = stream.readAll(); + + all.replace(QRegExp("^.*<div id=\"main\">"), ""); + + QRegExp matcher + ("<li><a href=\"([^\"]+)\">([^<]+)(<small>([^<]*)</small>)?</a> \\((\\*?)([0-9]+)[^0-9)]*([0-9]+)?\\)\\s*([^\\s])?</li>"); + + int pos = 0, count = 0; + while ((pos = matcher.indexIn(all, pos)) != -1) { + + pos += matcher.matchedLength(); + ++count; + + QString page = matcher.cap(1); + QString name = matcher.cap(2); + QString birth = matcher.cap(6); + QString death = matcher.cap(7); + QString female = matcher.cap(8); + + DEBUG << "Item " << count + << ": page = " << page + << ", name = " << name + << ", birth = " << birth + << ", death = " << death + << ", female = " << female; + + QString namefield = name.trimmed(); + NameMap names; + + parseNames(namefield, names); + + i = 0; + QString preferred; + foreach (QString n, names.keys()) { + if (preferred == "" || names[n] == 0) preferred = n; + DEBUG << "Name " << i << ": " << n << " score " << names[n] << endl; + ++i; + } + + if (names.empty()) { + DEBUG << "No name!" << endl; + continue; + } + + Composer *composer = new Composer(); + composer->setName(preferred); + foreach (QString n, names.keys()) { + if (n != preferred) composer->addAlias(n); + } + + if (page != "") { + Document *d = new Document; + d->setUri(QUrl("http://www.classical-composers.org" + page)); + d->setTopic(composer); + d->setSiteName("Classical Composers Database"); + composer->addPage(d); + } + if (birth != "") { + Birth *e = new Birth(birth.toInt()); + composer->setBirth(e); + } + if (death != "") { + composer->setDeath(new Death(death.toInt())); + } + if (female != "") { + composer->setGender("female"); + } + + m_objects.push_back(composer); + } + + DEBUG << "Found " << count << " things" << endl; + +} + + +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportClassicalComposersOrg.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_CLASSICAL_COMPOSERS_ORG_H_ +#define _IMPORT_CLASSICAL_COMPOSERS_ORG_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class ClassicalComposersOrgImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::ClassicalComposersOrgImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportClassicalDotNet.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportClassicalDotNet.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +ClassicalDotNetImporter::setSource(QUrl source) +{ + DEBUG << "ClassicalDotNetImporter::setSource: " << source << endl; + import(source); +} + +void +parseNames(QString field, QStringList &names) +{ + field.replace("Ä", QChar(0x00C4)); // LATIN CAPITAL LETTER A WITH DIAERESIS + field.replace("ł", QChar(0x0142)); // LATIN SMALL LETTER L WITH STROKE + field.replace("Ř", QChar(0x0158)); // LATIN CAPITAL LETTER R WITH CARON + + field.replace("á", QChar(0x00E1)); + field.replace("Á", QChar(0x00C1)); + field.replace("ç", QChar(0x00E7)); + field.replace("é", QChar(0x00E9)); + field.replace("É", QChar(0x00C9)); + field.replace("È", QChar(0x00C8)); + field.replace("Ë", QChar(0x00CB)); + field.replace("í", QChar(0x00ED)); + field.replace("Ï", QChar(0x00CF)); + field.replace("Ñ", QChar(0x00D1)); + field.replace("Ó", QChar(0x00D3)); + field.replace("Ô", QChar(0x00D4)); + field.replace("ò", QChar(0x00F2)); + field.replace("ö", QChar(0x00F6)); + field.replace("Ÿ", QChar(0x0178)); + + if (field.contains(QRegExp("&[^ ]+;"))) { + DEBUG << "Failed to handle entity in " << field << endl; + } + + // all-caps -> titlecase + QRegExp re("[A-Z][^ ,]*[A-Z][^,]+"); + int mp = re.indexIn(field); + if (mp >= 0) { + int ml = re.matchedLength(); + bool initial = true; + for (int i = 0; i < ml; ++i) { + if (initial) { + initial = false; + continue; + } + if (field[mp + i].isUpper()) { + field[mp + i] = field[mp + i].toLower(); + } else if (field[mp + i].isSpace()) { + initial = true; + } + } + } + + field = field.trimmed(); + names.push_back(field); + + // comma + re = QRegExp("^([^,]+), ([^,]+)$"); + if ((mp = re.indexIn(field)) >= 0) { + QString c(re.cap(1)); + QString d(re.cap(2)); + names.push_back(d + " " + c); + return; + } +} + +void +ClassicalDotNetImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + QString all = stream.readAll(); + + all.replace(QRegExp("^.*<div id=\"center\">"), ""); + + QRegExp matcher + ("<li><a href=\"([^\"]+)\">([^<]+)</a></li>"); + + int pos = 0, count = 0; + while ((pos = matcher.indexIn(all, pos)) != -1) { + pos += matcher.matchedLength(); + ++count; + + DEBUG << "Item " << count + << ": page = " << matcher.cap(1) + << ", name = " << matcher.cap(2); + + QString namefield = matcher.cap(2); + QStringList names; + + parseNames(namefield, names); + if (names.empty()) { + DEBUG << "No name!" << endl; + continue; + } + + if (names[0].contains(" Collections")) { + continue; + } + + Composer *composer = new Composer(); + composer->setName(names[0]); + for (int i = 1; i < names.size(); ++i) { + composer->addAlias(names[i]); + } + + if (matcher.cap(1) != "") { + QString url = matcher.cap(1); + url.replace(QRegExp("^\\.\\./"), "/music/"); + Document *d = new Document; + d->setUri(QUrl("http://www.classical.net" + url)); + d->setTopic(composer); + d->setSiteName("Classical Net"); + composer->addPage(d); + } + + m_objects.push_back(composer); + } + + + DEBUG << "Found " << count << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportClassicalDotNet.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_CLASSICAL_DOT_NET_H_ +#define _IMPORT_CLASSICAL_DOT_NET_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class ClassicalDotNetImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::ClassicalDotNetImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportHoboken.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,225 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportHoboken.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +HobokenImporter::setSource(QUrl source) +{ + DEBUG << "HobokenImporter::setSource: " << source << endl; + import(source); +} + +QString +hobToForm(QString hob) +{ + QStringList bits = hob.split(':'); + QString group = bits[0]; + int num = bits[1].toInt(); + if (group == "I") return "symphony"; + if (group == "II" && (num <= 24 || !bits[1][0].isDigit())) return "divertimento"; + if (group == "III") return "string quartet"; + if (group == "IV") return "divertimento"; + if (group == "V") return "string trio;trio"; + if (group == "VI") return "string duo;duo;sonata"; + if (group == "VII") return "concerto"; + if (group == "VIII") return "march"; + if (group == "IX") return "dance"; + if (group == "X") return "divertimento"; + if (group == "XI") return "trio"; + if (group == "XII") return "duo"; + if (group == "XIII") return "concerto"; + if (group == "XIV") return "divertimento"; + if (group == "XV") return "piano trio;trio"; + if (group == "XVI") return "piano sonata;sonata"; + if (group == "XVII") return "work for piano"; + if (group == "XVIIa") return "work for piano"; + if (group == "XVIII") return "piano concerto;concerto"; + if (group == "XXII") return "mass"; + if (group == "XXIIa") return "requiem"; + //!!! choral works + return ""; +} + +void +HobokenImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + + QString composerName = "Joseph Haydn"; + + DEBUG << "composerName = " << composerName << endl; + + QMap<QString, Work *> hobMap; + QMap<int, Work *> opusMap; + + while (!stream.atEnd()) { + + QString line = stream.readLine(); + + QString hob = ""; + + QRegExp hobre("^([\\d][^ _]+_([A-Za-z]*)[^ ]+) "); + + if (hobre.indexIn(line) >= 0) { + + hob = hobre.cap(1); + Work *w = 0; + Composition *cn = 0; + + if (!hobMap.contains(hob)) { + w = new Work(); + QString key = hobre.cap(2); + if (key != "") { + if (key.length() > 1 && key[1] == 's') { + key = key[0] + "-flat"; + } + if (key[0].isLower()) { + key[0] = key[0].toUpper(); + key += " minor"; + } else { + key += " major"; + } + w->setKey(key); + } + cn = new Composition(); + cn->setComposerName(composerName); + cn->addWork(w); + w->setComposition(cn); + hobMap[hob] = w; + } else { + w = hobMap[hob]; + cn = w->composition(); + } + + QRegExp hobre2("^[^ ]+ # (Hob [^ ]*)"); + if (hobre2.indexIn(line) >= 0) { + QString hobtext = hobre2.cap(1); + w->setCatalogue(hobtext); + QStringList forms = hobToForm(hobtext).split(";"); + foreach (QString f, forms) { + if (f != "") { + w->addForm(Form::getFormByName(f)); + } + } + continue; + } + + QRegExp titlere("^[^ ]+ @([^ ]+) (.*)"); + if (titlere.indexIn(line) >= 0) { + QString title = titlere.cap(2).trimmed(); + if (titlere.cap(1) == "en") { + if (w->name() != "") { + w->addAlias(w->name()); + } + w->setName(title); + } else { + if (w->name() == "") { + w->setName(title); + } else { + w->addAlias(title); + } + } + continue; + } + + QRegExp httpre("^[^ ]+ (http:[^ ]*) *$"); + if (httpre.indexIn(line) >= 0) { + QString url = httpre.cap(1).trimmed(); + Document *d = new Document; + d->setUri(url); + d->setTopic(w); + if (url.contains("wikipedia")) d->setSiteName("Wikipedia"); + else if (url.contains("klassika.info")) { + d->setSiteName("Klassika - Die deutschsprachigen Klassikseiten"); + } + w->addPage(d); + continue; + } + + QRegExp datere("^[^ ]+ \\[[^]]*(\\d{4})[^]]*\\]"); + if (datere.indexIn(line) >= 0) { + cn->setYear(datere.cap(1).toInt()); + continue; + } + + QRegExp opre("^[^ ]+ -> ([^ ]+)"); + if (opre.indexIn(line) >= 0) { + QString optext = opre.cap(1); + w->setOpus(optext); + if (optext.contains('/')) { + QStringList ops = optext.split('/'); + int opno = ops[0].toInt(); + if (opno == 0) { + DEBUG << "Failed to convert " << optext << " to op no" << endl; + } else { + if (!opusMap.contains(opno)) { + opusMap[opno] = new Work(); + opusMap[opno]->setOpus(ops[0]); + opusMap[opno]->setComposition(new Composition()); + opusMap[opno]->composition()->setComposerName(composerName); + } + opusMap[opno]->addPart(w); + w->setPartOf(opusMap[opno]); + w->setOpus(ops[0]); + w->setNumber(ops[1]); + } + } + continue; + } + + continue; + } + + QRegExp opre("^Opus ([\\d][^ ]*): (.*)"); + if (opre.indexIn(line) >= 0) { + QString optext = opre.cap(1); + int opno = optext.toInt(); + if (!opusMap.contains(opno)) { + opusMap[opno] = new Work(); + opusMap[opno]->setOpus(optext); + opusMap[opno]->setComposition(new Composition()); + opusMap[opno]->composition()->setComposerName(composerName); + } + QString title = opre.cap(2); + title.replace("<br>", " - "); + opusMap[opno]->setName(title); + continue; + } + + DEBUG << "Failed to match line: " << line << endl; + } + + foreach (Work *w, hobMap) m_objects.push_back(w); + foreach (Work *w, opusMap) m_objects.push_back(w); + + + DEBUG << "Found " << m_objects.size() << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportHoboken.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_HOBOKEN_H_ +#define _IMPORT_HOBOKEN_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class HobokenImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::HobokenImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaComposers.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,225 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportWikipediaComposers.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +WikipediaComposersImporter::setSource(QUrl source) +{ + DEBUG << "WikipediaComposersImporter::setSource: " << source << endl; + import(source); +} + +Composer * +addComposer(QString namefield, QString birthfield, QString deathfield, + QString datesfield, QString nationalityfield, QString worksfield, + QString summaryfield) +{ + namefield = namefield.trimmed(); + birthfield = birthfield.trimmed(); + deathfield = deathfield.trimmed(); + datesfield = datesfield.trimmed(); + nationalityfield = nationalityfield.trimmed(); + worksfield = worksfield.trimmed(); + summaryfield = summaryfield.trimmed(); + + Composer *composer = new Composer(); + + QString name = namefield; + name.replace("[[", ""); + name.replace("]]", ""); + QString pagename = name; + + if (name.contains('|')) { + QStringList bits = name.split('|'); + pagename = bits[0]; + name = bits[1]; + } + + composer->setName(name); + + pagename.replace(" ", "_"); + QUrl url; + url.setScheme("http"); + url.setHost("en.wikipedia.org"); + + url.setPath("/wiki/" + QUrl::toPercentEncoding(pagename)); + Document *d = new Document; + d->setUri(url); + d->setSiteName("Wikipedia"); + d->setTopic(composer); + composer->addPage(d); + + if (datesfield.contains("fl.")) datesfield = ""; // "flourished", meaningless for us at the moment + + bool approx = (datesfield.contains("c.") || datesfield.contains("?") + || datesfield.contains("before") || datesfield.contains("after")); + + if (datesfield != "") { + DEBUG << "dates for " << name << ": " << datesfield << endl; + datesfield.replace("(", ""); + datesfield.replace(")", ""); + datesfield.replace(" ", ""); + datesfield.replace(QString::fromUtf8("\342\200\222"), "-"); + datesfield.replace(QString::fromUtf8("\342\200\223"), "-"); + datesfield.replace(QString::fromUtf8("\342\200\224"), "-"); + datesfield.replace(QString::fromUtf8("\342\200\225"), "-"); + datesfield.replace("--", "-"); + DEBUG << "dates for " << name << ": " << datesfield << endl; + + QRegExp birthRe1("([0-9][0-9][0-9][0-9])(/[0-9]+|to[0-9]+)?-"); + QRegExp birthRe2("b\\.(c\\.|\\?|[a-z]+)?([0-9][0-9][0-9][0-9])(/[0-9]+|to[0-9]+)?"); + + if (birthRe1.indexIn(datesfield) >= 0) birthfield = birthRe1.cap(1); + else if (birthRe2.indexIn(datesfield) >= 0) birthfield = birthRe2.cap(2); + + QRegExp deathRe1("-(c\\.|\\?|[a-z]+)?([0-9][0-9][0-9][0-9])"); + QRegExp deathRe2("d\\.(c\\.|\\?|[a-z]+)?([0-9][0-9][0-9][0-9])"); + + if (deathRe1.indexIn(datesfield) >= 0) deathfield = deathRe1.cap(2); + else if (deathRe2.indexIn(datesfield) >= 0) deathfield = deathRe2.cap(2); + +// datesfield.replace(QRegExp("[^0-9]+"), "-"); +/* + QStringList list = datesfield.split('-'); + if (!list.empty()) { + birthfield = list[0]; + if (list.size() > 1) { + deathfield = list[1]; + } + } +*/ + DEBUG << " -> dates normalised to " << birthfield << " to " << deathfield << " approx =" << approx << endl; + } + if (birthfield != "") { + Birth *e = new Birth(birthfield.toInt()); + e->setApproximate(approx); + composer->setBirth(e); + } + if (deathfield != "") { + Death *e = new Death(deathfield.toInt()); + e->setApproximate(approx); + composer->setDeath(e); + } + if (nationalityfield != "") { + composer->setNationality(nationalityfield); + } + if (summaryfield != "") { + summaryfield.replace(QRegExp("^[Cc]omposer, *"), ""); + summaryfield[0] = summaryfield[0].toUpper(); + summaryfield.replace(QRegExp("\\[\\[[^]\\|]+\\|"), "[["); + summaryfield.replace("[[", ""); + summaryfield.replace("]]", ""); + summaryfield.replace("''", "\""); + summaryfield.replace(""", "'"); + summaryfield.replace(QRegExp("^\\[[^]]*\\]$"), ""); + summaryfield.replace("[", ""); + summaryfield.replace("]", ""); + composer->setRemarks(summaryfield); + } + + return composer; +} + +void +WikipediaComposersImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + + QString period; + DEBUG << "source = " << source.toString() << endl; + QRegExp pmatcher1("List_of_([0-9][^_-]+[_-][^_-]+)_"); + QRegExp pmatcher2("List_of_([^_-]+)[_-]era_"); + QRegExp pmatcher3("([^_-]+)_composers"); + if (pmatcher1.indexIn(source.toString()) >= 0) period = pmatcher1.cap(1); + else if (pmatcher2.indexIn(source.toString()) >= 0) period = pmatcher2.cap(1); + else if (pmatcher3.indexIn(source.toString()) >= 0) period = pmatcher3.cap(1); + DEBUG << "period = "<< period << endl; + + int count = 0; + + // table form A (used of e.g. Romantic transitional composers) + // | Name || birth || death || nationality || summary || flags + // note: 5x || + QRegExp matcher1("^\\| *\\[\\[([^]]+)\\]\\] *\\|\\|([^|]*[0-9]) *\\|\\|([^|]*[0-9]) *\\|\\|([^|]*)\\|\\|(.*)\\|\\|"); + + // table form B (used of e.g. 20th-century composers) + // | Name || birth-[death] || nationality || notable works || remarks + // Note name may contain a single | if in double-square brackets, hence 2a + // note: 4x || + QRegExp matcher2("^\\| *\\[\\[([^]]+)\\]\\] *\\|\\| *([0-9]+) *[^0-9] *([0-9]*) *\\|\\|([^0-9|]*)\\|\\|(.*)\\|\\|(.*)"); + // just in case the final column has been omitted completely (as happens). + // this must be matched after matcher2 + QRegExp matcher2a("^\\| *\\[\\[([^]]+)\\]\\] *\\|\\| *([0-9]+) *[^0-9] *([0-9]*) *\\|\\|([^0-9|]*)\\|\\|(.*)"); + + // list form + // * [[Name]] [alias?] (stuff about dates)[,] notes + QRegExp matcher3("^\\* *\\[\\[([^\\]]+)\\]\\],? *([^\\(]*) *\\(([^\\)]+)\\)(,?) *(.*)"); + + while (!stream.atEnd()) { + QString line = stream.readLine(); + + Composer *o = 0; + + if (matcher1.indexIn(line) >= 0) { + + o = addComposer(matcher1.cap(1), matcher1.cap(2), matcher1.cap(3), + "", matcher1.cap(4), "", matcher1.cap(5)); + + } else if (matcher2.indexIn(line) >= 0) { + + o = addComposer(matcher2.cap(1), matcher2.cap(2), matcher2.cap(3), "", + matcher2.cap(4), matcher2.cap(5), ""); + + } else if (matcher2a.indexIn(line) >= 0) { + + o = addComposer(matcher2a.cap(1), matcher2a.cap(2), matcher2a.cap(3), "", + matcher2a.cap(4), "", ""); + + } else if (matcher3.indexIn(line) >= 0) { + + o = addComposer(matcher3.cap(1), "", "", matcher3.cap(3), + "", "", matcher3.cap(5)); + + } else if (line.startsWith("* ") || line.startsWith("| ") || + line.startsWith("*[") || line.startsWith("|[")) { + DEBUG << "Failed to match promising line: " << line << endl; + } + + if (o) { + if (period != "") o->setPeriod(period); + m_objects.push_back(o); + ++count; + } + + } + + DEBUG << "Found " << count << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaComposers.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_WIKIPEDIA_COMPOSERS_H_ +#define _IMPORT_WIKIPEDIA_COMPOSERS_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class WikipediaComposersImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::WikipediaComposersImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorks.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,401 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportWikipediaWorks.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +WikipediaWorksImporter::setSource(QUrl source) +{ + DEBUG << "WikipediaWorksImporter::setSource: " << source << endl; + import(source); +} + +QString +sanitise(QString field, QString &linkText) +{ + int mp; + + field.replace(QString::fromUtf8("\342\200\222"), "-"); + field.replace(QString::fromUtf8("\342\200\223"), "-"); + field.replace(QString::fromUtf8("\342\200\224"), "-"); + field.replace(QString::fromUtf8("\342\200\225"), "-"); + + QRegExp link2("^([^A-Za-z]*)\\[\\[([^\\]\\|]+)\\|([^\\]]+)\\]\\]"); + if ((mp = link2.indexIn(field)) >= 0) { + if (linkText == "") linkText = link2.cap(2); + field.replace(mp, link2.matchedLength(), link2.cap(1) + link2.cap(3)); + return sanitise(field, linkText); + } + + QRegExp link1("^([^A-Za-z]*)\\[\\[([^\\]]+)\\]\\]"); + if ((mp = link1.indexIn(field)) >= 0) { + if (linkText == "") linkText = link1.cap(2); + field.replace(mp, link1.matchedLength(), link1.cap(1) + link1.cap(2)); + return sanitise(field, linkText); + } + + field = field.trimmed(); + + field.replace("[", ""); + field.replace("]", ""); + field.replace(QRegExp("\\{+[^\\}]*\\}+ *"), ""); + field.replace("''", "\""); + field.replace(""", "\""); + field.replace(QRegExp("<[^&]*>"), ""); + field.replace(QRegExp("^\\**"), ""); + + while (field.endsWith(".") || field.endsWith(",")) { + field = field.left(field.length()-1); + } + + if (field.startsWith("(") && field.endsWith(")")) { + DEBUG << "before: " << field; + field = field.mid(1, field.length()-2); + DEBUG << "after: " << field; + } + field.replace(QRegExp("^\\**"), ""); + if (field == ")" || field == "(") { + field = ""; + } + + field.replace(" - ,", ","); + + return field; +} + +QString +extractYear(QString datefield) +{ + QRegExp re("[0-9]{4}"); + if (re.indexIn(datefield) >= 0) { + return re.cap(0); + } + return ""; +} + +QString +extractKey(QString titlefield) +{ + QRegExp re("in ([A-H]([ -][a-z]+)? (major|minor))"); + if (re.indexIn(titlefield) >= 0) { + return re.cap(1); + } + return ""; +} + +Work * +makeWork(QString composerName, QString opfield, QString kfield, + QString numfield, QString titlefield, QString datefield, + QString placefield, QString remarksfield, Work *main) +{ + QString linkText; + + Work *w = new Work; + + QString op = sanitise(opfield, linkText); + if (op != "") { + op.replace("Opus ", ""); + op.replace("Op. ", ""); + op.replace("Op ", ""); + w->setOpus(op); + } + + QString k = sanitise(kfield, linkText); + if (k != "") { + w->setCatalogue(k); + } + + QString num = sanitise(numfield, linkText); + if (num != "") { + num.replace("No. ", ""); + num.replace("No ", ""); + w->setNumber(num); + } + + QString key = extractKey(titlefield); + if (key != "") { + w->setKey(key); + } + + QString title = sanitise(titlefield, linkText); + if (linkText != "") { + linkText.replace(" ", "_"); + QUrl url; + url.setScheme("http"); + url.setHost("en.wikipedia.org"); + url.setPath("/wiki/" + QUrl::toPercentEncoding(linkText)); + Document *d = new Document; + d->setUri(url); + d->setSiteName("Wikipedia"); + d->setTopic(w); + w->addPage(d); + } + + QRegExp explicationRE("^(\"[^-]+\") - (.*)$"); + int pos; + if ((pos = explicationRE.indexIn(title)) >= 0) { + w->addAlias(explicationRE.cap(2)); + title = explicationRE.cap(1); + } + + if (remarksfield == "") { + QRegExp remarksRE("^(\"[^-]+\") (for .*)$"); + if ((pos = remarksRE.indexIn(title)) >= 0) { + remarksfield = remarksRE.cap(2); + title = remarksRE.cap(1); + } + } + + if (remarksfield == "") { + QRegExp remarksRE("^(\"[^-]+\"), (.*)$"); + if ((pos = remarksRE.indexIn(title)) >= 0) { + remarksfield = remarksRE.cap(2); + title = remarksRE.cap(1); + } + } + + w->setName(title); + + QString remarks = sanitise(remarksfield, linkText); + if (remarks != "") { + w->setRemarks(remarks); + } + + QString year = extractYear(datefield); + QString place = sanitise(placefield, linkText); + + DEBUG << "title = " << title << endl; + + if (main) { + main->addPart(w); + w->setPartOf(main); + w->setComposition(main->composition()); + main->composition()->addWork(w); + } + + if (!main || !main->composition() || + (year != "" && (main->composition()->year() != year.toInt()))) { + Composition *c = new Composition; + c->setComposerName(composerName); + c->addWork(w); + c->setYear(year.toInt()); + c->setPlace(place); + w->setComposition(c); + } + + return w; +} + + +void +WikipediaWorksImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + + QString composerName; + if (filename.contains("K%C3%B6chel")) { + composerName = "Wolfgang Amadeus Mozart"; + } else if (filename.contains("/Schubert_")) { + composerName = "Franz Schubert"; + } else { + QRegExp byby("by_(.*)_by"); + if (byby.indexIn(filename) >= 0) { + composerName = byby.cap(1).replace('_', ' '); + } else { + QRegExp by("by_(.*)"); + if (by.indexIn(filename) >= 0) { + composerName = by.cap(1).replace('_', ' '); + } + } + } + composerName = QUrl::fromPercentEncoding(composerName.toLocal8Bit()); + + DEBUG << "composerName = " << composerName << endl; + + // K numbers in tabular form (as found in "Köchel Catalogue" WP page) + QRegExp matcherK("\\|- *\n\\|[^\n]*\n\\|\\{\\{[^\\[]*\\[\\[K\\. *([0-9][0-9a-z]*)[^\n]*\n\\|([^\n]*)\n\\|([^\n]*)\n\\|([^\n]*)\n"); + + QString all = stream.readAll(); + + DEBUG << "Read " << all.length() << " chars" << endl; + + all.replace(QRegExp("^.*<page>"), ""); + + int pos = 0, count = 0; + + while ((pos = matcherK.indexIn(all, pos)) != -1) { + + all.replace(pos, matcherK.matchedLength(), ""); + ++count; + + QString kfield = matcherK.cap(1); + QString titlefield = matcherK.cap(2); + QString datefield = matcherK.cap(3); + QString placefield = matcherK.cap(4); + + m_objects.push_back + (makeWork(composerName, "K. " + kfield, kfield, "", + titlefield, datefield, placefield, "", 0)); + } + + // Opus in list form (as used for e.g. Beethoven's works) + QRegExp matcherB("[\\*:] *'*((Opus|Op\\.|WoO|Anh|H|D) [0-9][^,:'{\n]*)'*[,:{] *([^\n]*)\n"); + + // Part of an opus (e.g. op 18 no 1), intended to be anchored to + // the point at which the last matcherB or matcherB2 match ended + // (note caret) + QRegExp matcherB2("^[\\*:]{2} *([A-Za-z ]*)((No\\.* +)?[0-9][^ :\n]*)[: ] *([^\n]*)\n"); + + // Date and remarks within titlefield + QRegExp matcherDate("\\([^\\)]*([0-9]{4})[^0-9\\)]*\\)(.*)"); + + pos = 0; + + while ((pos = matcherB.indexIn(all, pos)) != -1) { + + all.replace(pos, matcherB.matchedLength(), ""); + ++count; + + QString opfield = matcherB.cap(1); + QString titlefield = matcherB.cap(3); + + QString datefield, remarksfield; + + if (titlefield != "") { + int dpos; + if ((dpos = matcherDate.indexIn(titlefield)) != -1) { + datefield = matcherDate.cap(1); + remarksfield = matcherDate.cap(2); + titlefield = titlefield.left(dpos); + } + } + + Work *main = makeWork(composerName, opfield, "", "", + titlefield, datefield, "", remarksfield, 0); + + m_objects.push_back(main); + + int spos = pos; + + while ((spos = matcherB2.indexIn(all, spos, QRegExp::CaretAtOffset)) + != -1) { + + all.replace(spos, matcherB2.matchedLength(), ""); + ++count; + + QString numfield = matcherB2.cap(2); + + titlefield = matcherB2.cap(4); + + if (matcherB2.cap(1).trimmed() != "") { + titlefield = matcherB2.cap(1) + matcherB2.cap(2) + " " + + matcherB2.cap(4); + DEBUG << "prefix to number = " << matcherB2.cap(1) << ", so extending title from " << matcherB2.cap(4) << " to " << titlefield << endl; + } + + datefield = ""; + remarksfield = ""; + + if (titlefield != "") { + int dpos; + if ((dpos = matcherDate.indexIn(titlefield)) != -1) { + datefield = matcherDate.cap(1); + remarksfield = matcherDate.cap(2); + titlefield = titlefield.left(dpos); + } + } + + Work *sub = makeWork(composerName, opfield, "", numfield, + titlefield, datefield, "", remarksfield, main); + + m_objects.push_back(sub); + } + } + + // Title with date but no opus in list form (as used for e.g. Copland) + QRegExp matcherC("\\* *([^\n]*)\\([^\\)]*([0-9]{4})[^\\)]*\\) *\n"); + + // Part of the above (e.g. song in cycle), intended to be anchored to + // the point at which the last matcherC or matcherC2 match ended + // (note caret) + QRegExp matcherC2("^\\*\\* *([^\n]*)\n"); + + pos = 0; + + while ((pos = matcherC.indexIn(all, pos)) != -1) { + + all.replace(pos, matcherC.matchedLength(), ""); + ++count; + + QString titlefield = matcherC.cap(1); + QString datefield = matcherC.cap(2); + + Work *main = makeWork(composerName, "", "", "", + titlefield, datefield, "", "", 0); + + m_objects.push_back(main); + + int spos = pos; + + while ((spos = matcherC2.indexIn(all, spos, QRegExp::CaretAtOffset)) + != -1) { + + all.replace(spos, matcherC2.matchedLength(), ""); + ++count; + + titlefield = matcherC2.cap(1); + + datefield = ""; + + if (titlefield != "") { + int dpos; + if ((dpos = matcherDate.indexIn(titlefield)) != -1) { + datefield = matcherDate.cap(1); + titlefield = titlefield.left(dpos); + } + } + + Work *sub = makeWork(composerName, "", "", "", + titlefield, datefield, "", "", main); + + m_objects.push_back(sub); + } + } + + + + DEBUG << "Left over: " << all << endl; + + // Other forms: + // *March No. 1 in F major for Military Band, WoO 18 (1808) + + + DEBUG << "Found " << count << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorks.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_WIKIPEDIA_WORKS_H_ +#define _IMPORT_WIKIPEDIA_WORKS_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class WikipediaWorksImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::WikipediaWorksImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorksK.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,270 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportWikipediaWorksK.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +WikipediaWorksKImporter::setSource(QUrl source) +{ + DEBUG << "WikipediaWorksKImporter::setSource: " << source << endl; + import(source); +} + +static QString +sanitise(QString field, QString &linkText) +{ + int mp; + + field.replace(QString::fromUtf8("\342\200\222"), "-"); + field.replace(QString::fromUtf8("\342\200\223"), "-"); + field.replace(QString::fromUtf8("\342\200\224"), "-"); + field.replace(QString::fromUtf8("\342\200\225"), "-"); + + QRegExp link2("^([^A-Za-z]*)\\[\\[([^\\]\\|]+)\\|([^\\]]+)\\]\\]"); + if ((mp = link2.indexIn(field)) >= 0) { + if (linkText == "") linkText = link2.cap(2); + field.replace(mp, link2.matchedLength(), link2.cap(1) + link2.cap(3)); + return sanitise(field, linkText); + } + + QRegExp link1("^([^A-Za-z]*)\\[\\[([^\\]]+)\\]\\]"); + if ((mp = link1.indexIn(field)) >= 0) { + if (linkText == "") linkText = link1.cap(2); + field.replace(mp, link1.matchedLength(), link1.cap(1) + link1.cap(2)); + return sanitise(field, linkText); + } + + field = field.trimmed(); + + field.replace("[", ""); + field.replace("]", ""); + field.replace(QRegExp("\\{+[^\\}]*\\}+ *"), ""); + field.replace("''", "\""); + field.replace(""", "\""); + field.replace(QRegExp("<[^&]*>"), ""); + field.replace(QRegExp("^\\**"), ""); + + while (field.endsWith(".") || field.endsWith(",")) { + field = field.left(field.length()-1); + } + + if (field.startsWith("(") && field.endsWith(")")) { + DEBUG << "before: " << field; + field = field.mid(1, field.length()-2); + DEBUG << "after: " << field; + } + field.replace(QRegExp("^\\**"), ""); + if (field == ")" || field == "(") { + field = ""; + } + + field.replace(" - ,", ","); + + return field; +} + +static QString +extractYear(QString datefield) +{ + QRegExp re("[0-9]{4}"); + if (re.indexIn(datefield) >= 0) { + return re.cap(0); + } + return ""; +} + +static QString +extractKey(QString titlefield) +{ + QRegExp re("in ([A-H]([ -][a-z]+)? (major|minor))"); + if (re.indexIn(titlefield) >= 0) { + return re.cap(1); + } + return ""; +} + +static Work * +makeWork(QString composerName, QString opfield, QString kfield, + QString numfield, QString titlefield, QString datefield, + QString placefield, QString remarksfield, Work *main) +{ + QString linkText; + + Work *w = new Work; + + QString op = sanitise(opfield, linkText); + if (op != "") { + op.replace("Opus ", ""); + op.replace("Op. ", ""); + op.replace("Op ", ""); + w->setOpus(op); + } + + QString k = sanitise(kfield, linkText); + if (k != "") { + k.replace("K. ", "K "); + w->setCatalogue(k); + } + + QString num = sanitise(numfield, linkText); + if (num != "") { + num.replace("No. ", ""); + num.replace("No ", ""); + w->setNumber(num); + } + + QString key = extractKey(titlefield); + if (key != "") { + w->setKey(key); + } + + QString title = sanitise(titlefield, linkText); + if (linkText != "") { + linkText.replace(" ", "_"); + QUrl url; + url.setScheme("http"); + url.setHost("en.wikipedia.org"); + url.setPath("/wiki/" + QUrl::toPercentEncoding(linkText)); + Document *d = new Document; + d->setUri(url); + d->setSiteName("Wikipedia"); + d->setTopic(w); + w->addPage(d); + } + + QRegExp explicationRE("^(\"[^-]+\") - (.*)$"); + int pos; + if ((pos = explicationRE.indexIn(title)) >= 0) { + w->addAlias(explicationRE.cap(2)); + title = explicationRE.cap(1); + } + + if (remarksfield == "") { + QRegExp remarksRE("^(\"[^-]+\") (for .*)$"); + if ((pos = remarksRE.indexIn(title)) >= 0) { + remarksfield = remarksRE.cap(2); + title = remarksRE.cap(1); + } + } + + if (remarksfield == "") { + QRegExp remarksRE("^(\"[^-]+\"), (.*)$"); + if ((pos = remarksRE.indexIn(title)) >= 0) { + remarksfield = remarksRE.cap(2); + title = remarksRE.cap(1); + } + } + + w->setName(title); + + QString remarks = sanitise(remarksfield, linkText); + if (remarks != "") { + w->setRemarks(remarks); + } + + QString year = extractYear(datefield); + QString place = sanitise(placefield, linkText); + + DEBUG << "title = " << title << endl; + + if (main) { + main->addPart(w); + w->setPartOf(main); + w->setComposition(main->composition()); + main->composition()->addWork(w); + } + + if (!main || !main->composition() || + (year != "" && (main->composition()->year() != year.toInt()))) { + Composition *c = new Composition; + c->setComposerName(composerName); + c->addWork(w); + c->setYear(year.toInt()); + c->setPlace(place); + w->setComposition(c); + } + + return w; +} + + +void +WikipediaWorksKImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + + QString composerName; + if (filename.contains("K%C3%B6chel")) { + composerName = "Wolfgang Amadeus Mozart"; + } else { + QRegExp byby("by_(.*)_by"); + if (byby.indexIn(filename) >= 0) { + composerName = byby.cap(1).replace('_', ' '); + } else { + QRegExp by("by_(.*)"); + if (by.indexIn(filename) >= 0) { + composerName = by.cap(1).replace('_', ' '); + } + } + } + composerName = QUrl::fromPercentEncoding(composerName.toLocal8Bit()); + + DEBUG << "composerName = " << composerName << endl; + + // K numbers in tabular form (as found in "Köchel Catalogue" WP page) + QRegExp matcherK("\\|- *\n\\|[^\n]*\n\\|\\{\\{[^\\[]*\\[\\[(K\\.? *[0-9][0-9a-z]*)[^\n]*\n\\|([^\n]*)\n\\|([^\n]*)\n\\|([^\n]*)\n"); + + QString all = stream.readAll(); + + DEBUG << "Read " << all.length() << " chars" << endl; + + all.replace(QRegExp("^.*<page>"), ""); + + int pos = 0, count = 0; + + while ((pos = matcherK.indexIn(all, pos)) != -1) { + + all.replace(pos, matcherK.matchedLength(), ""); + ++count; + + QString kfield = matcherK.cap(1); + QString titlefield = matcherK.cap(2); + QString datefield = matcherK.cap(3); + QString placefield = matcherK.cap(4); + + m_objects.push_back + (makeWork(composerName, "", kfield, "", + titlefield, datefield, placefield, "", 0)); + } + + DEBUG << "Left over: " << all << endl; + + DEBUG << "Found " << count << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorksK.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_WIKIPEDIA_WORKS_K_H_ +#define _IMPORT_WIKIPEDIA_WORKS_K_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class WikipediaWorksKImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::WikipediaWorksKImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorksList.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,668 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "ImportWikipediaWorksList.h" + +#include <dataquay/Debug.h> + +#include <QFile> +#include <QFileInfo> +#include <QTextStream> +#include <QRegExp> +#include <QVariant> + +#include <exception> + +using namespace Dataquay; + +namespace ClassicalData { + +void +WikipediaWorksListImporter::setSource(QUrl source) +{ + DEBUG << "WikipediaWorksListImporter::setSource: " << source << endl; + import(source); +} + +static QString +sanitise(QString field, QString &linkText) +{ + int mp; + + field.replace(QString::fromUtf8("\342\200\222"), "-"); + field.replace(QString::fromUtf8("\342\200\223"), "-"); + field.replace(QString::fromUtf8("\342\200\224"), "-"); + field.replace(QString::fromUtf8("\342\200\225"), "-"); + + field.replace(QString::fromUtf8("\342\231\255"), "-flat"); + field.replace(QString::fromUtf8("\342\231\257"), "-sharp"); + + QRegExp link2("([^A-Za-z]*)\\[\\[([^\\]\\|]+)\\|([^\\]]+)\\]\\]"); + if ((mp = link2.indexIn(field)) >= 0) { + if (linkText == "" && mp < 4) linkText = link2.cap(2); + field.replace(mp, link2.matchedLength(), link2.cap(1) + link2.cap(3)); + return sanitise(field, linkText); + } + + QRegExp link1("^([^A-Za-z]*)\\[\\[([^\\]]+)\\]\\]"); + if ((mp = link1.indexIn(field)) >= 0) { + if (linkText == "") linkText = link1.cap(2); + field.replace(mp, link1.matchedLength(), link1.cap(1) + link1.cap(2)); + return sanitise(field, linkText); + } + + field = field.trimmed(); + + field.replace("[", ""); + field.replace("]", ""); + field.replace(QRegExp("\\{+[^\\}]*\\}+ *"), " "); + field.replace("'''", "\""); + field.replace("''", "\""); + field.replace(""", "\""); + field.replace("\"\"", "\""); + field.replace(QRegExp("^[\'\"] (\")?"), "\""); + field.replace(QRegExp("<[^&]*>"), ""); + field.replace(QRegExp("^\\**"), ""); + + if (field.endsWith("c.")) { + // historical artifact from removal of Bruckner year indication (c. 1856) + field = field.left(field.length()-2); + } + + while (field.endsWith(".") || field.endsWith(",")) { + field = field.left(field.length()-1); + } + + if (field.startsWith(";") || field.startsWith(":") || field.startsWith(",") + || field.startsWith("-")) { + field = field.right(field.length()-1); + } + + if (field.startsWith("(") && field.endsWith(")")) { + DEBUG << "before: " << field; + field = field.mid(1, field.length()-2); + DEBUG << "after: " << field; + } + + field.replace(QRegExp("^\\**"), ""); + if (field == ")" || field == "(") { + field = ""; + } + + field.replace(" - ,", ","); + field.replace(" ", " "); + + return field.trimmed(); +} + +static QString +extractYear(QString datefield) +{ + QRegExp re("[0-9]{4}"); + if (re.indexIn(datefield) >= 0) { + return re.cap(0); + } + return ""; +} + +static QString +extractKey(QString titlefield) +{ + QRegExp re("in ([A-H]([ -][a-z]+)? (major|minor))"); + if (re.indexIn(titlefield) >= 0) { + return re.cap(1); + } + return ""; +} + +static Work * +makeWork(QString composerName, QString opfield, QString numfield, + int partNumber, QString titlefield, QString datefield, + QString placefield, QString remarksfield, Work *main) +{ + if (titlefield.contains("List of ") || titlefield.contains("http:")) return 0; + + QString linkText; + + Work *w = new Work; + + QRegExp embeddedOpMatcher("([Oo]pus|[Oo]p.|WAB) (posth[a-z\\.]* *)?([0-9][^ ;:,]*)(,? *([Nn]umber|[Nn]o.|[Nn]r.) ([0-9][^ ;:,]*))?,?"); + if (embeddedOpMatcher.indexIn(titlefield) >= 0) { + QString opf = embeddedOpMatcher.cap(0); + if (opfield == "") opfield = opf; + titlefield.replace(opf, ""); + } else if (embeddedOpMatcher.indexIn(remarksfield) >= 0) { + opfield = embeddedOpMatcher.cap(0); + } + if (main && numfield == "") { + QRegExp embeddedNumMatcher("(Number|No.|Nr.) ([0-9][^ ;:,]*)"); + if (embeddedNumMatcher.indexIn(titlefield) >= 0) { + numfield = embeddedNumMatcher.cap(2); + } else if (embeddedNumMatcher.indexIn(remarksfield) >= 0) { + numfield = embeddedNumMatcher.cap(2); + } + } + + QString op = sanitise(opfield, linkText); + if (op != "") { + if (op.toLower().contains("op")) { + op.replace("Opus ", ""); + op.replace("Op. ", ""); + op.replace("Op.", ""); + op.replace("Op ", ""); + op.replace("opus ", ""); + op.replace("op. ", ""); + op.replace("op.", ""); + op.replace("op ", ""); + w->setOpus(op); + } else if (QRegExp("^[0-9]*$").indexIn(op) >= 0) { + w->setOpus(op); + } else { + w->setCatalogue(op); + } + } + + QString num = sanitise(numfield, linkText); + if (num != "") { + num.replace("No. ", ""); + num.replace("No ", ""); + w->setNumber(num); + } else if (partNumber > 0) { + w->setNumber(QString("%1").arg(partNumber)); + } + + QString key = extractKey(titlefield); + if (key != "") { + w->setKey(key); + } + + DEBUG << "title before sanitise: " << titlefield << endl; + + remarksfield = remarksfield.trimmed(); + + QString title = sanitise(titlefield, linkText); + title.replace(QRegExp(", which.*$"), ""); + if (linkText != "") { + if (remarksfield == "" && title.startsWith(linkText)) { + remarksfield = title.right(title.length() - linkText.length()); + title = linkText; + } + linkText.replace(" ", "_"); + QUrl url; + url.setScheme("http"); + url.setHost("en.wikipedia.org"); + url.setPath("/wiki/" + QUrl::toPercentEncoding(linkText)); + Document *d = new Document; + d->setUri(url); + d->setSiteName("Wikipedia"); + d->setTopic(w); + w->addPage(d); + } + + DEBUG << "title after sanitise: " << title << ", link text " << linkText << ", remarks " << remarksfield << endl; + + QRegExp explicationRE("^(\"[^-]+\") - (.+)$"); + int pos; + if ((pos = explicationRE.indexIn(title)) >= 0) { + QString part = explicationRE.cap(2); + if (part[0].isUpper()) w->addAlias(explicationRE.cap(2)); + else if (remarksfield == "") remarksfield = explicationRE.cap(2); + title = explicationRE.cap(1); + } + + QRegExp remarksRE1("^(\"[^-]+\") (for .*)$"); + if ((pos = remarksRE1.indexIn(title)) >= 0) { + if (remarksfield != "") { + remarksfield = QString("%1 - %2") + .arg(remarksRE1.cap(2)).arg(remarksfield); + } else { + remarksfield = remarksRE1.cap(2); + } + title = remarksRE1.cap(1); + } + + QRegExp remarksRE2("^(\"[^\"]+\"), (.*)$"); + if ((pos = remarksRE2.indexIn(title)) >= 0) { + if (remarksfield != "") { + remarksfield = QString("%1 - %2") + .arg(remarksRE2.cap(2)).arg(remarksfield); + } else { + remarksfield = remarksRE2.cap(2); + } + title = remarksRE2.cap(1); + } + + QRegExp explicationRE2("^([^\\(]*\") \\(([^\\)]*)\\)(.*)$"); + if ((pos = explicationRE2.indexIn(title)) >= 0) { + w->addAlias(explicationRE2.cap(2)); + if (remarksfield == "") remarksfield = explicationRE2.cap(3); + title = explicationRE2.cap(1); + } + + if (title.startsWith("Song \"")) { + title = title.right(title.length() - 5); + w->addForm(Form::getFormByName("song")); + } + if (!main && title.startsWith("Song cycle \"")) { + title = title.right(title.length() - 11); + w->addForm(Form::getFormByName("song cycle")); + } + if (main && main->forms().contains(Form::getFormByName("song cycle"))) { + w->addForm(Form::getFormByName("song")); + } + + if (title == "" && !main) { + delete w; + return 0; + } + + w->setName(title); + + QString remarks = sanitise(remarksfield, linkText); + if (remarks != "") { + w->setRemarks(remarks); + } + + QString year = extractYear(datefield); + QString place = sanitise(placefield, linkText); + + DEBUG << "title = " << title << endl; + + if (main) { + main->addPart(w); + w->setPartOf(main); + w->setComposition(main->composition()); + main->composition()->addWork(w); + } + + if (!main || !main->composition() || + (year != "" && (main->composition()->year() != year.toInt()))) { + Composition *c = new Composition; + c->setComposerName(composerName); + c->addWork(w); + c->setYear(year.toInt()); + c->setPlace(place); + w->setComposition(c); + } + + return w; +} + + +void +WikipediaWorksListImporter::import(QUrl source) +{ + //!!! for now + QString filename = source.toLocalFile(); + + QFile file(filename); + if (!file.open(QFile::ReadOnly | QFile::Text)) { + throw std::exception(); + } + + QTextStream stream(&file); + stream.setCodec("UTF-8"); + + QString composerName; + if (filename.contains("K%C3%B6chel")) { + composerName = "Wolfgang Amadeus Mozart"; + } else if (filename.contains("/Schubert_")) { + composerName = "Franz Schubert"; + } else { + QRegExp byby("by_(.*)_by"); + if (byby.indexIn(filename) >= 0) { + composerName = byby.cap(1).replace('_', ' '); + } else { + QRegExp bybr("by_(.*)_\\("); + if (bybr.indexIn(filename) >= 0) { + composerName = bybr.cap(1).replace('_', ' '); + } else { + QRegExp by("by_(.*)"); + if (by.indexIn(filename) >= 0) { + composerName = by.cap(1).replace('_', ' '); + } else { + QRegExp of("of_([A-Z].*)"); + if (of.indexIn(filename) >= 0) { + composerName = of.cap(1).replace('_', ' '); + } + } + } + } + } + composerName = QUrl::fromPercentEncoding(composerName.toLocal8Bit()); + + DEBUG << "composerName = " << composerName << endl; + + + // We try to keep these matchers specific enough that we can be + // sure the title field will come out containing _at least_ the + // title. i.e. the title field should never end up with just the + // opus number or date or whatever, even if the line is formatted + // in a way we hadn't anticipated. Thus it helps if the title is + // bookended by '' or [[]], etc + + // e.g. Beethoven + // *Opus 84: ''[[Egmont (Beethoven)|Egmont]]'', overture and incidental music (1810) + // opus field - n/a - title - date - n/a - remarks + QRegExp workMatcher1("^\\* *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:{]*)[:,] *(.*) *\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\) *(.*)$"); + + // e.g. Tchaikovsky + // *'''Op. 19''' 6 Pieces, for piano (1873) + // or Ravel + // * '''1''', Piano Sonata movement (1888), lost + +/* + // opus field - n/a - title - date - n/a - remarks + QRegExp workMatcher1a("^\\* *'''(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G)? *[0-9][^ ,:'{]*)'''[:, ] *(.*) *\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\) *(.*)$"); +*/ + // opus field - n/a - title + QRegExp workMatcher1a("^\\* *'''(([Oo]pus|[Oo]p\\.|WoO|Anh|[A-Z]{1,2})?\\.? *[0-9][^ ,:'{]*),?'''[:, ] *(.*)$"); + + // e.g. Copland + // * ''Four Motets'' for mixed voices (1921) + // title - date field + // (no opus) + QRegExp workMatcher2("^\\* *(''.*''\\)?) *(.*)$"); + workMatcher2.setMinimal(true); // avoid matching multiple ''...'' substrings + + // e.g. Copland + // * Arrangement of ''Lincoln Portrait'' for concert band (1942) + // or Mendelssohn + // * [[Christe du Lamm Gottes]] (1827), SATB, strings + // title - date field - remarks + // (no opus) + QRegExp workMatcher3("^\\* *([^\\*].*) *\\(([^\\)]*[0-9]{4}[^\\)]*)\\) *(.*)$"); + + // e.g. Scriabin + // *[[Sonata No. 2 (Scriabin)|Sonata No. 2 in G sharp minor]], Op. 19 (also known as ''Sonata-Fantasy'')" + // title - opus field - n/a - remarks + QRegExp workMatcher4("^\\* *(\\[\\[.*\\]\\]),* (([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*) *(.*)$"); + + // e.g. Scriabin + // *Opus 35: [[Opus 35 (Scriabin)|Three Preludes]] + // opus field - n/a - title - remarks + QRegExp workMatcher5("^\\* *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*)[:,]* *([\\[']+.*[\\]']+) *(.*)$"); + + // e.g. Boccherini + // *G 1: Cello Sonata in F major + // or weird Schubert layout + // * D 505{{nbsp|4}}Adagio in D-flat for Piano + // or Glazunov + // :Op. 67: ''[[The Seasons (ballet)|The Seasons]]'', ballet in one act (1900) + // or even + // ::Op. 77: ''[[Symphony No. 7 (Glazunov)|Symphony No. 7]]'' "Pastorale" in F major (1902-1903) + // This one is a real mess, for really messy pages. Needs to go near + // the end of the matchers in case it catches something it shouldn't + // n/a - opus field - n/a - n/a - n/a - title + QRegExp workMatcher6("^([\\*:]|::) *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*)(([:,]| *\\{+[^\\}]+\\}+) *(.*))?$"); + + // e.g. Bruch + // * Adagio appassionato for violin and orchestra in C sharp minor, Op. 57 + // title - opus field - date field + QRegExp workMatcher7("^\\* *(.*),? (([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*|[Oo]p. posth[a-z.]*) *(\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\))? *$"); + + // e.g. Bruckner + // * Symphony No. 0 in D minor 1869 WAB 100 + // title - date field - opus field + QRegExp workMatcher8("^\\* *(.*) ([0-9]{4}[0-9/-]*) *(WAB [0-9][^ ]*)$"); + + // e.g. Bach + // * BWV 506 ? Was bist du doch, o Seele, so betruebet + // opus field - title + QRegExp workMatcher9("^\\* *(BWV [^ ]+)(.*)$"); + + // Catch-all for things that look at all promising (anything that + // starts with ' or [ after bullet: take the whole as title) + QRegExp workMatcher10("^[\\*:] *((['\\[]|").*)$"); + + + + // e.g. Beethoven + // **No. 1: [[Piano Trio No. 1 (Beethoven)|Piano Trio No. 1]] in E-flat major + // number field - n/a - title, remarks etc + QRegExp partMatcher1("^[\\*:]{2} *((No\\.? *)?[0-9][^ ,:'{]*)[:, ] *(.*)$"); + + // e.g. Copland + // ** ''Help us, O Lord'' + // title - remarks + QRegExp partMatcher2("^\\*\\* *(''.*'') *(.*)$"); + partMatcher2.setMinimal(true); // avoid matching multiple ''...'' substrings + + // e.g. Scriabin + // **[[Mazurka Op. 40 No. 1 (Scriabin)|Mazurka in D flat major]] + // title - remarks + QRegExp partMatcher3("^\\*\\* *(\\[\\[.*\\]\\])(.*)$"); + + // e.g. Berlioz + // ** 1: ''Méditation religieuse'' + // number - title - remarks + QRegExp partMatcher4("^\\*\\* *([0-9][0-9a-z]*)[\\.: ] *([\\[]*''.*''[\\]]*) *(.*)$"); + + // e.g. Tchaikovsky + // **4. Nocturne [???????] (C? minor) + // number - title - remarks + QRegExp partMatcher5("^\\*\\* *([0-9][0-9a-z]*)[\\.: ] *(.*\\[[^\\]]+\\])(.*)$"); + + // e.g. Schubert + // **2. "Wohin?" + // n/a - number - title + QRegExp partMatcher6("^\\*\\* *(([0-9][0-9a-z]*)[\\.:])? *(("|'').*)$"); + + // e.g. Mendelssohn + // ** Notturno + // title only + QRegExp partMatcher7("^\\*\\* *(.*)$"); + + + // Date and remarks within titlefield or remarksfield + QRegExp matcherDate("\\([^\\)]*([0-9]{4})[^0-9\\)]*\\),?(.*)"); + + + Work *main = 0; + int partNumber = 0; + + QString line; + QString opfield, numfield, titlefield, remarksfield, datefield; + + while (!stream.atEnd()) { + + if (line == "") { + line = stream.readLine(); + DEBUG << "line: " << line << endl; + } + + opfield = ""; + numfield = ""; + titlefield = ""; + datefield = ""; + remarksfield = ""; + partNumber = 0; + + if (workMatcher1.indexIn(line) >= 0) { + + DEBUG << "matcher 1" << endl; + opfield = workMatcher1.cap(1); + titlefield = workMatcher1.cap(3); + datefield = workMatcher1.cap(4); + remarksfield = workMatcher1.cap(6); + + } else if (workMatcher1a.indexIn(line) >= 0) { + + DEBUG << "matcher 1a" << endl; + opfield = workMatcher1a.cap(1); + titlefield = workMatcher1a.cap(3); +/* + datefield = workMatcher1a.cap(4); + remarksfield = workMatcher1a.cap(6); +*/ + + } else if (workMatcher2.indexIn(line) >= 0) { + + DEBUG << "matcher 2" << endl; + titlefield = workMatcher2.cap(1); + remarksfield = workMatcher2.cap(2); + + } else if (workMatcher3.indexIn(line) >= 0) { + + DEBUG << "matcher 3" << endl; + titlefield = workMatcher3.cap(1); + datefield = workMatcher3.cap(2); + remarksfield = workMatcher3.cap(3); + + } else if (workMatcher4.indexIn(line) >= 0) { + + DEBUG << "matcher 4" << endl; + titlefield = workMatcher4.cap(1); + opfield = workMatcher4.cap(2); + remarksfield = workMatcher4.cap(4); + + } else if (workMatcher5.indexIn(line) >= 0) { + + DEBUG << "matcher 5" << endl; + opfield = workMatcher5.cap(1); + titlefield = workMatcher5.cap(3); + remarksfield = workMatcher5.cap(4); + + } else if (workMatcher6.indexIn(line) >= 0) { + + DEBUG << "matcher 6" << endl; + opfield = workMatcher6.cap(2); + titlefield = workMatcher6.cap(6); + + } else if (workMatcher7.indexIn(line) >= 0) { + + DEBUG << "matcher 7" << endl; + titlefield = workMatcher7.cap(1); + opfield = workMatcher7.cap(2); + datefield = workMatcher7.cap(3); + + } else if (workMatcher8.indexIn(line) >= 0) { + + DEBUG << "matcher 8" << endl; + titlefield = workMatcher8.cap(1); + datefield = workMatcher8.cap(2); + opfield = workMatcher8.cap(3); + + } else if (workMatcher9.indexIn(line) >= 0) { + + DEBUG << "matcher 9" << endl; + opfield = workMatcher9.cap(1); + titlefield = workMatcher9.cap(2); + + } else if (workMatcher10.indexIn(line) >= 0) { + + DEBUG << "matcher 10" << endl; + titlefield = workMatcher10.cap(1); + + } else { + if (line.startsWith("*") || line.startsWith(":")) { + DEBUG << "Failed to match promising works list line: " << line << endl; + } + line = ""; + continue; + } + + if (titlefield != "" && datefield == "") { + int dpos; + if ((dpos = matcherDate.indexIn(titlefield)) != -1) { + datefield = matcherDate.cap(1); + remarksfield = matcherDate.cap(2); + titlefield = titlefield.left(dpos); + } + } + + if (remarksfield != "" && datefield == "") { + int dpos; + if ((dpos = matcherDate.indexIn(remarksfield)) != -1) { + datefield = matcherDate.cap(1); + remarksfield = remarksfield.left(dpos); + } + } + + main = makeWork(composerName, opfield, "", 0, + titlefield, datefield, "", remarksfield, 0); + + if (main) m_objects.push_back(main); + + line = ""; + + while (!stream.atEnd()) { + + ++partNumber; + line = stream.readLine(); + DEBUG << "line: " << line << endl; + + if (partMatcher1.indexIn(line) >= 0) { + + DEBUG << "part matcher 1" << endl; + numfield = partMatcher1.cap(1); + titlefield = partMatcher1.cap(3); + remarksfield = ""; + + } else if (partMatcher2.indexIn(line) >= 0) { + + DEBUG << "part matcher 2" << endl; + titlefield = partMatcher2.cap(1); + remarksfield = partMatcher2.cap(2); + + } else if (partMatcher3.indexIn(line) >= 0) { + + DEBUG << "part matcher 3" << endl; + titlefield = partMatcher3.cap(1); + remarksfield = partMatcher3.cap(2); + + } else if (partMatcher4.indexIn(line) >= 0) { + + DEBUG << "part matcher 4" << endl; + numfield = partMatcher4.cap(1); + titlefield = partMatcher4.cap(2); + remarksfield = partMatcher4.cap(3); + + } else if (partMatcher5.indexIn(line) >= 0) { + + DEBUG << "part matcher 5" << endl; + numfield = partMatcher5.cap(1); + titlefield = partMatcher5.cap(2); + remarksfield = partMatcher5.cap(3); + + } else if (partMatcher6.indexIn(line) >= 0) { + + DEBUG << "part matcher 6" << endl; + numfield = partMatcher6.cap(2); + titlefield = partMatcher6.cap(3); + + } else if (partMatcher7.indexIn(line) >= 0) { + + DEBUG << "part matcher 7" << endl; + titlefield = partMatcher7.cap(1); + + } else { + if (line.startsWith("**") || line.startsWith("::")) { + DEBUG << "Failed to match promising part line: " << line << endl; + } + break; + } + + if (titlefield != "" && datefield == "") { + int dpos; + if ((dpos = matcherDate.indexIn(titlefield)) != -1) { + datefield = matcherDate.cap(1); + remarksfield = matcherDate.cap(2); + titlefield = titlefield.left(dpos); + } + } + + Work *part = makeWork(composerName, opfield, numfield, partNumber, + titlefield, datefield, "", remarksfield, + main); + + if (part) m_objects.push_back(part); + } + } + + DEBUG << "Found " << m_objects.size() << " things" << endl; +} + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/ImportWikipediaWorksList.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,29 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _IMPORT_WIKIPEDIA_WORKS_LIST_H_ +#define _IMPORT_WIKIPEDIA_WORKS_LIST_H_ + +#include "Importer.h" + +namespace ClassicalData { + +class WikipediaWorksListImporter : public Importer +{ + Q_OBJECT + +public: + virtual void setSource(QUrl source); + + virtual QObjectList getImportedObjects() { return m_objects; } + +protected: + void import(QUrl source); + + QObjectList m_objects; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::WikipediaWorksListImporter*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/Importer.h Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,36 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#ifndef _CLASSICAL_DATA_IMPORTER_H_ +#define _CLASSICAL_DATA_IMPORTER_H_ + +#include "Objects.h" + +#include <QUrl> +#include <QObject> + +namespace ClassicalData { + +class Importer : public QObject +{ + Q_OBJECT + + Q_PROPERTY(QUrl source READ source WRITE setSource STORED true) + +public: + Importer(QObject *parent = 0) : QObject(parent) { } + virtual ~Importer() { } + + virtual QUrl source() const { return m_source; } + virtual void setSource(QUrl source) = 0; + + virtual QObjectList getImportedObjects() = 0; + +protected: + QUrl m_source; +}; + +} + +Q_DECLARE_METATYPE(ClassicalData::Importer*); + +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/Test.cpp Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,831 @@ +/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ + +#include "Objects.h" + +#include <dataquay/BasicStore.h> +#include <dataquay/RDFException.h> +#include <dataquay/objectmapper/ObjectMapper.h> +#include <dataquay/objectmapper/ObjectBuilder.h> +#include <dataquay/objectmapper/ContainerBuilder.h> + +#include "ImportClassicalComposersOrg.h" +#include "ImportClassicalDotNet.h" +#include "ImportWikipediaComposers.h" +#include "ImportWikipediaWorks.h" +#include "ImportWikipediaWorksK.h" +#include "ImportWikipediaWorksList.h" +#include "ImportHoboken.h" + +#include <dataquay/Debug.h> + +using namespace ClassicalData; +using namespace Dataquay; + +#include <iostream> +#include <set> + +typedef QMap<QString, QSet<Composer *> > ComposerMap; // name -> composers + +bool datesMatch(Composer *a, Composer *b) +{ + if (a->birth() && b->birth()) { + if (abs(a->birth()->year() - b->birth()->year()) > 1) { + if ((!a->birth()->approximate() && !b->birth()->approximate()) || + (abs(a->birth()->year() - b->birth()->year()) > 10)) { + return false; + } + } + } + if (a->death() && b->death()) { + if (abs(a->death()->year() - b->death()->year()) > 1) { + if ((!a->death()->approximate() && !b->death()->approximate()) || + (abs(a->death()->year() - b->death()->year()) > 10)) { + return false; + } + } + } + return true; +} + +void +addMiscExpansions(Composer *c) +{ + QString n = c->name(); + + DEBUG << "addMiscExpansions: n = " << n << endl; + + // lovely hard-coded special cases go here! some of these are + // needed for works->composer assignments + if (n == "Balakirev, Milii") { + c->addAlias("Mily Balakirev"); + } + if (n.startsWith("Cui, C")) { + c->addAlias(QString::fromUtf8("C\303\251sar Cui")); + } + if (n == "Handel, George Frideric") { + c->addAlias("Handel, Georg Friedrich"); + c->addAlias("Handel"); + } + if (n == "Mayr, Simon") { + c->addAlias("Mayr"); + } + + n.replace(", Sr.", " Sr."); + n.replace(", Jr.", " Jr."); + + int comma = n.indexOf(", "); + if (comma > 0 && comma + 2 < n.length()) { + + QString left = n.left(comma); + QString right = n.right(n.length() - comma - 2); + + QRegExp jrsr("( (Sr\\.|Jr\\.|I|II))$"); + if (jrsr.indexIn(right) >= 0) { + left = left + jrsr.cap(1); + right = right.left(right.length()-jrsr.matchedLength()); + } + n = right + " " + left; + } + + if (n != c->name()) c->addAlias(n); + + if (n.contains("Sergey")) { + QString nn(n); + nn.replace("Sergey", "Sergei"); + c->addAlias(nn); + } + + QRegExp sr("((, )?Sr\\.|Senior|\\(?the elder\\)?)", Qt::CaseInsensitive); + if (sr.indexIn(n) >= 0) { + QString nr = n; + nr.replace(sr.pos(0), sr.matchedLength(), " I"); + nr.replace(" ", " "); + DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; + c->addAlias(nr); + } + QRegExp jr("((, )?Jr\\.|Junior|\\(?the younger\\)?)", Qt::CaseInsensitive); + if (jr.indexIn(n) >= 0) { + QString nr = n; + nr.replace(jr.pos(0), jr.matchedLength(), " II"); + nr.replace(" ", " "); + DEBUG << "addMiscExpansions: trying " << nr << " for " << n << endl; + c->addAlias(nr); + } + QString nr = n; + nr.replace("(I)", "I"); + nr.replace("(II)", "II"); + nr.replace("(III)", "III"); + c->addAlias(nr); +} + +bool namesFuzzyMatch(QString an, Composer *b) +{ + // ew! + + QString bn = b->name(); + if (bn == an) return true; + if (b->aliases().contains(an)) return true; + int aSurnameIndex = 0, bSurnameIndex = 0; + if (an.contains(",")) { + an.replace(",", ""); + } else { + aSurnameIndex = -1; + } + if (bn.contains(",")) { + bn.replace(",", ""); + } else { + bSurnameIndex = -1; + } + QStringList nl = an.split(QRegExp("[ -]")); + QStringList bnl = bn.split(QRegExp("[ -]")); + int matchCount = 0; + QString surnameMatch = ""; + if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; + if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; + if (nl[aSurnameIndex][0].isUpper() && + nl[aSurnameIndex] != "Della" && + nl[aSurnameIndex] == bnl[bSurnameIndex]) { + surnameMatch = nl[aSurnameIndex]; + } + foreach (QString elt, nl) { + if (!elt[0].isUpper() || elt == "Della") continue; + if (bnl.contains(elt)) { + ++matchCount; + continue; + } + } + if (matchCount > 1 && surnameMatch != "") { + DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; + return true; + } + return false; +} + +bool +hasBetterName(Composer *c, Composer *other) +{ + if (c->name() == other->name()) return false; + + // Try to guess which of c and other is more likely to have a good + // "canonical form" of the composer's name + + if (c->name().startsWith("van ")) { + return false; // wrong choice of sort for e.g. LvB; should be + // Beethoven, Ludwig van, not van Beethoven, Ludwig + } + if (other->name().startsWith("van ")) { + return true; + } + + if (c->aliases().size() != other->aliases().size()) { + // a rather weak heuristic + return c->aliases().size() > other->aliases().size(); + } + + if (c->name().contains(',') && !other->name().contains(',')) { + // another rather weak heuristic + return true; + } + + return false; +} + +void mergeComposer(Composer *c, ComposerMap &composers) +{ + QString name = c->name(); + + QSet<QString> allNames = c->aliases(); + allNames.insert(name); + + QString dates; + if (c->birth()) { + if (c->death()) { + dates = QString("%1-%2").arg(c->birth()->year()).arg(c->death()->year()); + } else { + dates = QString("%1-").arg(c->birth()->year()); + } + } + if (dates != "") { + allNames.insert(dates); + } + + QSet<Composer *> matches; + + foreach (QString candidateName, allNames) { + QString key = candidateName.toLower(); + if (composers.contains(key)) { + foreach (Composer *candidate, composers[key]) { + if (candidateName == dates) { + if (!namesFuzzyMatch(c->name(), candidate) && + !namesFuzzyMatch(candidate->name(), c)) { + DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; + continue; + } else { + DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; + } + } else { + if (!datesMatch(c, candidate)) { + DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; + continue; + } + } + matches.insert(candidate); + } + } + } + + if (matches.empty()) { + DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; + + if (!c->birth() && !c->death()) { + // laboriously look for fuzzy match across _all_ composers + for (ComposerMap::iterator i = composers.begin(); + i != composers.end(); ++i) { + foreach (Composer *candidate, *i) { + if (namesFuzzyMatch(c->name(), candidate)) { + DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; + matches.insert(candidate); + break; + } + } + if (!matches.empty()) break; + } + } + + if (matches.empty()) { + foreach (QString candidateName, allNames) { + composers[candidateName.toLower()].insert(c); + DEBUG << "added for alias or date " << candidateName << endl; + } + return; + } + } + + if (matches.size() > 1) { + DEBUG << "mergeComposer: More than one composer matches name and date(s) for " << c->name() << " -- something fishy here" << endl; + } + + Composer *other = *matches.begin(); + + DEBUG << "mergeComposer: Merging " << c->name() << " with " << other->name() << endl; + + if (hasBetterName(c, other)) { + other->addAlias(other->name()); + other->setName(c->name()); + } else { + other->addAlias(c->name()); + } + composers[c->name().toLower()].insert(other); + DEBUG << "linking from alias " << c->name() << endl; + + foreach (QString alias, c->aliases()) { + if (alias != other->name() && + !other->aliases().contains(alias)) { + other->addAlias(alias); + composers[alias.toLower()].insert(other); + DEBUG << "linking from alias " << alias << endl; + } + } + + foreach (Document *d, c->pages()) { + bool found = false; + foreach (Document *dd, other->pages()) { + if (d->uri() == dd->uri()) { + found = true; + break; + } + } + if (!found) { + d->setTopic(other); + other->addPage(d); + } + } + + //!!! actually the "approximate" bits of the following are bogus; + // a source reporting birth or death date as approx is probably + // more accurate than one reporting an exact date + + if (c->birth()) { + if (!other->birth() || other->birth()->approximate()) { + other->setBirth(c->birth()); + } + } + + if (c->death()) { + if (!other->death() || other->death()->approximate()) { + other->setDeath(c->death()); + } + } + + if (c->gender() != "") other->setGender(c->gender()); + if (c->nationality() != "") other->setNationality(c->nationality()); + if (c->remarks() != "") other->setRemarks(c->remarks()); + if (c->period() != "") other->setPeriod(c->period()); + +} + +QString +asciify(QString field) +{ + // accented characters etc -- add "ascii version" for dumb search purposes + QString ascii; + for (int i = 0; i < field.length(); ++i) { + QString dc = field[i].decomposition(); + if (dc != "") ascii += dc[0]; + else if (field[i] == QChar(0x00DF)) { + ascii += "ss"; + } else { + ascii += field[i]; + } + } + ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe + ascii.replace(QString::fromUtf8("\342\200\222"), "-"); + ascii.replace(QString::fromUtf8("\342\200\223"), "-"); + ascii.replace(QString::fromUtf8("\342\200\224"), "-"); + ascii.replace(QString::fromUtf8("\342\200\225"), "-"); + return ascii; +} + +void +asciify(Composer *c) +{ + QString n = c->name(); + QString asc = asciify(n); + if (asc != n && !c->aliases().contains(asc)) c->addAlias(asc); + foreach (QString alias, c->aliases()) { + asc = asciify(alias); + if (asc != alias && !c->aliases().contains(asc)) c->addAlias(asc); + } +} + +void +asciify(Work *w) +{ + QString n = w->name(); + QString asc = asciify(n); + if (asc != n && !w->aliases().contains(asc)) w->addAlias(asc); + foreach (QString alias, w->aliases()) { + asc = asciify(alias); + if (asc != alias && !w->aliases().contains(asc)) w->addAlias(asc); + } +} + +void +assignUri(Store *s, Composer *c) +{ + static QSet<QString> convSet; + QString conv = c->name(); + if (!conv.contains(",")) { + QStringList sl = conv.split(" "); + if (!sl.empty()) { + sl.push_front(sl[sl.size()-1]); + sl.removeLast(); + conv = sl.join(" "); + DEBUG << "assignUri: " << c->name() << " -> " << conv << endl; + } + } + conv = asciify(conv); + conv.replace(" ", "_"); + conv.replace("-", "_"); + conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); + conv = conv.toLower(); + QString initial = conv; + int i = 1; + while (convSet.contains(conv)) { + conv = QString("%1__%2").arg(initial).arg(i); + i++; + } + convSet.insert(conv); + c->setProperty("uri", s->expand(":composer_" + conv)); +} + +void +assignUri(Store *s, Work *w, Composer *c) +{ + QString pfx = c->property("uri").toUrl().toString(); + DEBUG << "pfx = " << pfx << endl; + if (!pfx.contains("composer_")) pfx = ""; + else pfx.replace(QRegExp("^.*composer_"), ""); + + static QSet<QString> convSet; + QString conv = w->catalogue(); + if (conv == "") conv = w->opus(); + conv = conv.replace(".", ""); + bool hasOpus = (conv != ""); + if (conv == "") conv = w->name(); + if (w->number() != "") conv = conv + "_no" + w->number(); + if (pfx != "") conv = pfx + "_" + conv; + conv = asciify(conv); + conv.replace(" ", "_"); + conv.replace("-", "_"); + conv.replace(":", "_"); + conv.replace(QRegExp("[^a-zA-Z0-9_-]"), ""); + conv = conv.toLower(); + // I think actually for works we want to merge duplicates rather than + // assign them separate URIs, _unless_ they lack a viable opus number + if (!hasOpus) { + QString initial = conv; + int i = 1; + while (convSet.contains(conv)) { + conv = QString("%1__%2").arg(initial).arg(i); + i++; + } + } + convSet.insert(conv); + w->setProperty("uri", s->expand(":work_" + conv)); +} + +void +addDbpediaResource(Store *store, QObject *o, QString s) +{ + QUrl u = o->property("uri").toUrl(); + if (u == QUrl()) return; + if (s.startsWith("http://en.wikipedia.org/wiki/")) { + store->add(Triple(u, + "mo:wikipedia", + QUrl(s))); + s.replace("http://en.wikipedia.org/wiki/", + "http://dbpedia.org/resource/"); + store->add(Triple(u, + "owl:sameAs", + QUrl(s))); + } +} + +int main(int argc, char **argv) +{ + qRegisterMetaType<HistoricalEvent *> + ("ClassicalData::HistoricalEvent*"); + qRegisterMetaType<Birth *> + ("ClassicalData::Birth*"); + qRegisterMetaType<Death *> + ("ClassicalData::Death*"); + qRegisterMetaType<Composition *> + ("ClassicalData::Composition*"); + qRegisterMetaType<Work *> + ("ClassicalData::Work*"); + qRegisterMetaType<Movement *> + ("ClassicalData::Movement*"); + qRegisterMetaType<Composer *> + ("ClassicalData::Composer*"); + qRegisterMetaType<Document *> + ("ClassicalData::Document*"); + qRegisterMetaType<Form *> + ("ClassicalData::Form*"); + qRegisterMetaType<QSet<Work *> > + ("QSet<ClassicalData::Work*>"); + qRegisterMetaType<QSet<Movement *> > + ("QSet<ClassicalData::Movement*>"); + qRegisterMetaType<QSet<Document *> > + ("QSet<ClassicalData::Document*>"); + qRegisterMetaType<QSet<Form *> > + ("QSet<ClassicalData::Form*>"); + qRegisterMetaType<QSet<QString> > + ("QSet<QString>"); + + qRegisterMetaType<ClassicalComposersOrgImporter *> + ("ClassicalData::ClassicalComposersOrgImporter*"); + qRegisterMetaType<ClassicalDotNetImporter *> + ("ClassicalData::ClassicalDotNetImporter*"); + qRegisterMetaType<WikipediaComposersImporter *> + ("ClassicalData::WikipediaComposersImporter*"); + qRegisterMetaType<WikipediaWorksImporter *> + ("ClassicalData::WikipediaWorksImporter*"); + qRegisterMetaType<WikipediaWorksKImporter *> + ("ClassicalData::WikipediaWorksKImporter*"); + qRegisterMetaType<WikipediaWorksListImporter *> + ("ClassicalData::WikipediaWorksListImporter*"); + qRegisterMetaType<HobokenImporter *> + ("ClassicalData::HobokenImporter*"); + + ObjectBuilder::getInstance()->registerClass + <HistoricalEvent>("ClassicalData::HistoricalEvent*"); + ObjectBuilder::getInstance()->registerClass + <Birth>("ClassicalData::Birth*"); + ObjectBuilder::getInstance()->registerClass + <Death>("ClassicalData::Death*"); + ObjectBuilder::getInstance()->registerClass + <Composition>("ClassicalData::Composition*"); + ObjectBuilder::getInstance()->registerClass + <Work, QObject>("ClassicalData::Work*"); + ObjectBuilder::getInstance()->registerClass + <Movement, QObject>("ClassicalData::Movement*"); + ObjectBuilder::getInstance()->registerClass + <Composer, QObject>("ClassicalData::Composer*"); + ObjectBuilder::getInstance()->registerClass + <Document, QObject>("ClassicalData::Document*"); + ObjectBuilder::getInstance()->registerClass + <Form, QObject>("ClassicalData::Form*"); + + ObjectBuilder::getInstance()->registerClass + <ClassicalComposersOrgImporter>("ClassicalData::ClassicalComposersOrgImporter*"); + ObjectBuilder::getInstance()->registerClass + <ClassicalDotNetImporter>("ClassicalData::ClassicalDotNetImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaComposersImporter>("ClassicalData::WikipediaComposersImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksImporter>("ClassicalData::WikipediaWorksImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksKImporter>("ClassicalData::WikipediaWorksKImporter*"); + ObjectBuilder::getInstance()->registerClass + <WikipediaWorksListImporter>("ClassicalData::WikipediaWorksListImporter*"); + ObjectBuilder::getInstance()->registerClass + <HobokenImporter>("ClassicalData::HobokenImporter*"); + + ContainerBuilder::getInstance()->registerContainer + <QString, QSet<QString> > + ("QString", "QSet<QString>", ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Work*, QSet<Work*> > + ("ClassicalData::Work*", "QSet<ClassicalData::Work*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Movement*, QSet<Movement*> > + ("ClassicalData::Movement*", "QSet<ClassicalData::Movement*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Document*, QSet<Document*> > + ("ClassicalData::Document*", "QSet<ClassicalData::Document*>", + ContainerBuilder::SetKind); + + ContainerBuilder::getInstance()->registerContainer + <Form*, QSet<Form*> > + ("ClassicalData::Form*", "QSet<ClassicalData::Form*>", + ContainerBuilder::SetKind); + + BasicStore *store = BasicStore::load("file:importers.ttl"); + ObjectMapper mapper(store); + QObject *parentObject = mapper.loadAllObjects(new QObject()); + + BasicStore *outstore = new BasicStore(); + ObjectMapper outmapper(outstore); + + outmapper.setPropertyStorePolicy(ObjectMapper::StoreIfChanged); + + outstore->addPrefix("type", outmapper.getObjectTypePrefix()); + outstore->addPrefix("classical", outmapper.getObjectTypePrefix() + "ClassicalData/"); + outstore->addPrefix("property", outmapper.getPropertyPrefix()); + outstore->addPrefix("rel", outmapper.getRelationshipPrefix()); + outstore->addPrefix("foaf", "http://xmlns.com/foaf/0.1/"); + outstore->addPrefix("mo", "http://purl.org/ontology/mo/"); + outstore->addPrefix("dc", "http://purl.org/dc/elements/1.1/"); + outstore->addPrefix("bio", "http://purl.org/vocab/bio/0.1/"); + outstore->addPrefix("owl", "http://www.w3.org/2002/07/owl#"); + outstore->addPrefix("rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + + outmapper.addPropertyMapping("ClassicalData::Composer", "pages", + outstore->expand("foaf:page")); + outmapper.addPropertyMapping("ClassicalData::Composer", "name", + outstore->expand("foaf:name")); + outmapper.addPropertyMapping("ClassicalData::Composer", "aliases", + outstore->expand("property:also_known_as")); + outmapper.addPropertyMapping("ClassicalData::Document", "topic", + outstore->expand("foaf:primaryTopic")); + + outmapper.addTypeMapping("ClassicalData::Work", + outstore->expand("mo:MusicalWork")); + outmapper.addPropertyMapping("ClassicalData::Work", "composition", + outstore->expand("mo:composed_in")); + outmapper.addPropertyMapping("ClassicalData::Work", "opus", + outstore->expand("mo:opus")); + outmapper.addPropertyMapping("ClassicalData::Work", "k6", + outstore->expand("mo:k6")); + outmapper.addPropertyMapping("ClassicalData::Work", "bwv", + outstore->expand("mo:bwv")); + outmapper.addPropertyMapping("ClassicalData::Work", "number", + outstore->expand("mo:number")); + outmapper.addPropertyMapping("ClassicalData::Work", "partOf", + outstore->expand("dc:isPartOf")); + outmapper.addPropertyMapping("ClassicalData::Work", "parts", + outstore->expand("dc:hasPart")); + outmapper.addPropertyMapping("ClassicalData::Work", "pages", + outstore->expand("foaf:page")); + outmapper.addPropertyMapping("ClassicalData::Work", "forms", + outstore->expand("property:form")); + outmapper.addPropertyMapping("ClassicalData::Work", "key", + outstore->expand("mo:key")); + outmapper.addPropertyMapping("ClassicalData::Work", "aliases", + outstore->expand("property:also_known_as")); + outmapper.addPropertyMapping("ClassicalData::Work", "name", + outstore->expand("dc:title")); + + outmapper.addTypeMapping("ClassicalData::Composition", + outstore->expand("mo:Composition")); + outmapper.addPropertyMapping("ClassicalData::Composition", "composer", + outstore->expand("mo:composer")); + outmapper.addPropertyMapping("ClassicalData::Composition", "works", + outstore->expand("mo:produced_work")); + + outstore->add(Triple("classical:Composer", "a", + outstore->expand("owl:Class"))); + outstore->add(Triple("classical:Composer", "rdfs:subClassOf", + outstore->expand("mo:MusicArtist"))); + + QList<Importer *> importers = parentObject->findChildren<Importer *>(); + std::cerr << "have " << importers.size() << " importers" << std::endl; + + ComposerMap composers; + + QList<Composer *> dated; + QList<Composer *> undated; + + QList<Work *> works; + QList<Composition *> compositions; + QList<QObject *> other; + + foreach (Importer *importer, importers) { + QObjectList objects = importer->getImportedObjects(); + foreach (QObject *o, objects) { + Composer *c; + if ((c = qobject_cast<Composer *>(o))) { + addMiscExpansions(c); + asciify(c); + if (c->birth() || c->death()) dated.push_back(c); + else undated.push_back(c); + continue; + } + Work *w; + if ((w = qobject_cast<Work *>(o))) { + asciify(w); + works.push_back(w); + continue; + } + Composition *cn; + if ((cn = qobject_cast<Composition *>(o))) { + compositions.push_back(cn); + continue; + } + } + } + + // get all the dated composers merged before attempting to match + // the undated ones + foreach (Composer *c, dated) { + mergeComposer(c, composers); + } + foreach (Composer *c, undated) { + mergeComposer(c, composers); + } + + QObjectList toStore; + + QSet<Composer *> cset; + for (ComposerMap::iterator i = composers.begin(); i != composers.end(); ++i) { + foreach (Composer *c, i.value()) { + if (!cset.contains(c)) { + assignUri(outstore, c); + toStore.push_back(c); + cset.insert(c); + } + foreach (Document *d, c->pages()) { + QString s = d->uri().toString(); + addDbpediaResource(outstore, c, s); + } + } + } + + QSet<QString> storedUris; + + foreach (Work *w, works) { + Composition *cn = w->composition(); + if (!cn) continue; + if (!cn->composer()) { + QString cname = cn->composerName(); + if (cname != "") { + if (!composers.contains(cname.toLower())) { + DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; + } else { + QSet<Composer *> cs = composers[cname.toLower()]; + if (cs.empty()) { + DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; + } else if (cs.size() > 1) { + DEBUG << "Failed to assign Composition to composer: " + << cs.size() << " composers match name " << cname << endl; + } else { + cn->setComposer(*cs.begin()); + } + } + } else { + DEBUG << "Failed to assign Composition to composer: composer name is empty" << endl; + } + } + + if (cn->composer()) { + assignUri(outstore, w, cn->composer()); + } + + foreach (Document *d, w->pages()) { + QString s = d->uri().toString(); + addDbpediaResource(outstore, w, s); + toStore.push_back(d); + } + + QString u = w->property("uri").toUrl().toString(); + if (u == "" || !storedUris.contains(u)) { + toStore.push_back(w); + if (u != "") storedUris.insert(u); + } + } + + try { + outmapper.storeAllObjects(toStore); + + } catch (RDFException e) { + std::cerr << "Caught RDF exception: " << e.what() << std::endl; + } + + DEBUG << "Stored, now saving" << endl; + + outstore->save("test-out.ttl"); + + DEBUG << "Saved" << endl; + + + QMultiMap<QString, Composer *> cmap; + foreach (Composer *c, cset) { + QString n = c->getSortName(true); + cmap.insert(n, c); + } + + std::cout << "Composers: " << cmap.size() << std::endl; + + for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); + i != cmap.end(); ++i) { + + QString n = i.key(); + Composer *c = i.value(); + + std::cout << n.toStdString(); + + QString d = c->getDisplayDates(); + if (d != "") std::cout << " (" << d.toStdString() << ")"; + std::cout << std::endl; + } + + std::cout << std::endl; + + std::cout << "Works by composer:" << std::endl; + + for (QMultiMap<QString, Composer *>::iterator i = cmap.begin(); + i != cmap.end(); ++i) { + + QString n = i.key(); + Composer *c = i.value(); + + std::set<Work *, Work::Ordering> wmap; + foreach (Work *w, works) { + Composition *cn = w->composition(); + if (!cn) continue; + if (cn->composer() != c) continue; + if (w->partOf()) continue; + wmap.insert(w); + } + + if (wmap.empty()) continue; + + std::cout << n.toStdString() << std::endl; + + foreach (Work *w, wmap) { + std::cout << " * "; + std::cout << w->name().toStdString(); + if (w->catalogue() != "") { + std::cout << " [" << w->catalogue().toStdString() << "]"; + } + if (w->opus() != "") { + std::cout << " [op. " << w->opus().toStdString() << "]"; + } + std::cout << std::endl; + std::set<Work *, Work::Ordering> orderedParts; + foreach (Work *ww, w->parts()) { + orderedParts.insert(ww); + } + foreach (Work *ww, orderedParts) { + std::cout << " "; + if (ww->number() != "") { + std::cout << ww->number().toStdString() << ". "; + } + std::cout << ww->name().toStdString(); + if (ww->catalogue() != "" && ww->catalogue() != w->catalogue()) { + std::cout << " [" << ww->catalogue().toStdString() << "]"; + } + if (ww->opus() != "" && ww->opus() != w->opus()) { + std::cout << " [op. " << ww->opus().toStdString() << "]"; + } + std::cout << std::endl; + } + } + + std::cout << std::endl; + } + + delete outstore; + + DEBUG << "Done" << endl; + + +} + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/import/importers.ttl Tue Dec 01 17:50:41 2009 +0000 @@ -0,0 +1,709 @@ +@base <#> . +@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> . +@prefix classical: <http://breakfastquay.com/rdf/dataquay/objectmapper/type/ClassicalData/> . +@prefix property: <http://breakfastquay.com/rdf/dataquay/objectmapper/property/> . +@prefix : <> . + + +:cdn a classical:ClassicalDotNetImporter ; + property:source <file:source-data/classical.net/masterindex.php> . + +:wkiA a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_Renaissance_composers> . + +:wkiM a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/Medieval_composers> . + +:wkiB a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_Baroque_composers> . + +:wkiC a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_Classical_era_composers> . + +:wkiD a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_Romantic-era_composers> . + +:wkiE a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_20th-century_classical_composers_by_birth_date> . + +:wkiF a classical:WikipediaComposersImporter ; + property:source <file:source-data/en.wikipedia.org/composers/List_of_21st-century_classical_composers_by_birth_date> . + +:cciA a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstA> . + +:cciB a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstB> . + +:cciC a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstC> . + +:cciD a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstD> . + +:cciE a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstE> . + +:cciF a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstF> . + +:cciG a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstG> . + +:cciH a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstH> . + +:cciI a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstI> . + +:cciJ a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstJ> . + +:cciK a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstK> . + +:cciL a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstL> . + +:cciM a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstM> . + +:cciN a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstN> . + +:cciO a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstO> . + +:cciP a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstP> . + +:cciQ a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstQ> . + +:cciR a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstR> . + +:cciS a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstS> . + +:cciT a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstT> . + +:cciU a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstU> . + +:cciV a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstV> . + +:cciW a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstW> . + +:cciX a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstX> . + +:cciY a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstY> . + +:cciZ a classical:ClassicalComposersOrgImporter ; + property:source <file:source-data/classical-composers.org/firstZ> . + + +# These ones need more work: +#:wki_216 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Antonio_Vivaldi> . + +# mangling of Mazeppa +#:wki_345 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Pyotr_Ilyich_Tchaikovsky> . + +# classification levels all wrong (many works listed at ** level) +#:wki_354 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Sergei_Prokofiev> . + +# tabular form +#:wki_201 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_chorale_harmonisations_by_Johann_Sebastian_Bach> . + + +:hob a classical:HobokenImporter ; + property:source <file:source-data/hoboken-klassika/sorted-with-hrefs-and-hob> . + + +:wki_198 a classical:WikipediaWorksKImporter ; + property:source <file:source-data/en.wikipedia.org/works/K%C3%B6chel_catalogue> . + +## metapage only +##:wki_199 a classical:WikipediaWorksListImporter ; +## property:source <file:source-data/en.wikipedia.org/works/List_of_Schubert_compositions_by_D_number> . + +:wki_200 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_cantatas_by_Johann_Sebastian_Bach> . + + +:wki_202 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_and_literary_works_by_Hector_Berlioz> . + + +:wki_203 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Aaron_Copland> . + + +#:wki_204 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Akil_Mark_Koci> . + + +:wki_205 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Alban_Berg> . + + +:wki_206 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Alexander_Borodin> . + + +:wki_207 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Alexander_Glazunov> . + + +:wki_208 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Alexander_Scriabin> . + +#:wki_209 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton%C3%ADn_Dvo%C5%99%C3%A1k> . + +# tabular +#:wki_210 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton%C3%ADn_Dvo%C5%99%C3%A1k_by_Burghauser_number> . + +:wki_211 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton_Bruckner> . + +:wki_212 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton_Diabelli> . + +:wki_213 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton_Reicha> . + +:wki_214 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Anton_Rubinstein> . + + +# messy, doesn't work -- do we care? +:wki_215 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Antonio_Salieri> . + + +:wki_217 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Aram_Khachaturian> . + +:wki_218 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Arthur_Bliss> . + +:wki_219 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Arthur_Honegger> . + +:wki_220 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_B%C3%A9la_Bart%C3%B3k> . + +:wki_221 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Bed%C5%99ich_Smetana> . + +:wki_222 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Benjamin_Britten> . + +:wki_223 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Bohuslav_Martin%C5%AF> . + +:wki_224 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_C%C3%A9sar_Cui> . + +:wki_225 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_C%C3%A9sar_Franck> . + +:wki_226 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Camille_Saint-Sa%C3%ABns> . + +:wki_227 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Carl_Czerny> . + +:wki_228 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Carl_Maria_von_Weber> . + +:wki_229 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Carl_Nielsen> . + +:wki_230 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Carl_Philipp_Emanuel_Bach> . + +#:wki_231 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Carlo_d%27Ordonez> . + +:wki_232 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Charles-Valentin_Alkan> . + +:wki_233 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Charles_Ives> . + +:wki_234 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Charles_Wuorinen> . + +:wki_235 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Claude_Debussy> . + +:wki_236 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Claude_Debussy_by_Lesure_Numbers> . + +:wki_237 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Constant_Lambert> . + +:wki_238 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Darius_Milhaud> . + +:wki_239 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Daron_Hagen> . + +:wki_240 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_David_Maslanka> . + +:wki_241 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Dieterich_Buxtehude> . + +:wki_242 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Django_Reinhardt> . + +:wki_243 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Dmitri_Shostakovich> . + +:wki_244 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Edvard_Grieg> . + +:wki_245 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Edward_Elgar> . + +:wki_246 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Einojuhani_Rautavaara> . + +#:wki_247 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Emilio_Pujol> . + +:wki_248 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Erik_Satie> . + +:wki_249 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Ernest_Chausson> . + +:wki_250 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Eug%C3%A8ne_Bozza> . + +#:wki_251 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fabio_Vacchi> . + +:wki_252 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fanny_Mendelssohn> . + +:wki_253 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Felix_Mendelssohn> . + +:wki_254 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Ferdinando_Carulli> . + +:wki_255 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fernando_Sor> . + +:wki_256 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fr%C3%A9d%C3%A9ric_Chopin> . + +:wki_257 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fran%C3%A7ois_Couperin> . + +:wki_258 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Francisco_Tarrega> . + +:wki_259 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Franz_Lachner> . + +:wki_260 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Franz_Liszt_(S.1_-_S.350)> . + +:wki_261 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Franz_Liszt_(S.351_-_S.999)> . + +:wki_262 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Franz_Schubert> . + +:wki_263 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Friedrich_Kuhlau> . + +:wki_264 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Fritz_Kreisler> . + +:wki_265 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Gabriel_Faur%C3%A9> . + +:wki_266 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Georg_B%C3%B6hm> . + +:wki_267 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_George_Enescu> . + +:wki_268 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_George_Frideric_Handel> . + +:wki_269 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_George_Gershwin> . + +:wki_270 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_George_Onslow> . + +:wki_271 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Georges_Bizet> . + +:wki_272 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Germaine_Tailleferre> . + +:wki_273 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Giacinto_Scelsi> . + +:wki_274 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Gioachino_Rossini> . + +:wki_275 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Giovanni_Bottesini> . + +:wki_276 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Giovanni_Pierluigi_da_Palestrina> . + +:wki_277 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Girolamo_Frescobaldi> . + +:wki_278 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Giuseppe_Verdi> . + +:wki_279 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Gregory_Short> . + +:wki_280 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Guillaume_de_Machaut> . + +:wki_281 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Gustav_Holst> . + +:wki_282 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Harrison_Birtwistle> . + +:wki_283 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Heinrich_Sch%C3%BCtz> . + +:wki_284 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Heitor_Villa-Lobos> . + +:wki_285 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Henry_Purcell> . + +:wki_286 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Henryk_G%C3%B3recki> . + +:wki_287 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Herbert_Howells> . + +:wki_288 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Howard_Skempton> . + +:wki_289 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Iannis_Xenakis> . + +:wki_290 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Igor_Stravinsky> . + +:wki_291 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_James_Scott> . + +:wki_292 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Jan_Ladislav_Dussek> . + +:wki_293 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Jean-Baptiste_Lully> . + +:wki_294 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Jean_Sibelius> . + +:wki_295 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Jennifer_Higdon> . + +:wki_296 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Adolph_Hasse> . + +:wki_297 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Christian_Bach> . + +:wki_298 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Jakob_Froberger> . + +:wki_299 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Joachim_Quantz> . + +:wki_300 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Nepomuk_Hummel> . + +:wki_301 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Pachelbel> . + +:wki_302 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Sebastian_Bach> . + +:wki_303 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johann_Strauss_II> . + +:wki_304 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Johannes_Brahms> . + +:wki_305 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_John_Cage> . + +:wki_306 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_John_Philip_Sousa> . + +:wki_307 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Josef_Suk> . + +:wki_308 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Joseph_Martin_Kraus> . + +#:wki_309 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Juan_Mar%C3%ADa_Solare> . + +:wki_310 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Karlheinz_Stockhausen> . + +:wki_311 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Karol_Szymanowski> . + +:wki_312 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Krzysztof_Penderecki> . + +:wki_313 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Kurt_Atterberg> . + +:wki_314 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_L%C3%A9o_Delibes> . + +:wki_315 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Leo%C5%A1_Jan%C3%A1%C4%8Dek> . + +:wki_316 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Leo_Sowerby> . + +:wki_317 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Lorenzo_Perosi> . + +:wki_318 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Louis_Couperin> . + +:wki_319 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Luciano_Berio> . + +:wki_320 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Ludwig_van_Beethoven> . + +:wki_321 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Luigi_Boccherini> . + +:wki_322 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Lukas_Foss> . + +:wki_323 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Malcolm_Williamson> . + +#:wki_324 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Matteo_Carcassi> . + +:wki_325 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Maurice_Ravel> . + +:wki_326 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Mauro_Giuliani> . + +:wki_327 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Max_Bruch> . + +:wki_328 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Max_Reger> . + +:wki_329 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Michael_Haydn> . + +:wki_330 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Mieczyslaw_Weinberg> . + +:wki_331 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Mily_Balakirev> . + +:wki_332 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Modest_Mussorgsky> . + +:wki_333 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Moritz_Moszkowski> . + +:wki_334 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Muzio_Clementi> . + +#:wki_335 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Nicholas_Lens> . # who? + +:wki_336 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Nikolai_Kapustin> . + +:wki_337 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Nikolai_Rimsky-Korsakov> . + +:wki_338 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Olivier_Messiaen> . + +:wki_339 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Otto_Albert_Tich%C3%BD> . + +:wki_340 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Paul_Hindemith> . + +:wki_341 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Paul_Juon> . + +:wki_342 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Paul_Moravec> . + +:wki_343 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Philip_Glass> . + +:wki_344 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Pierre_Boulez> . + + +:wki_346 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Rebecca_Clarke> . + +:wki_347 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Reynaldo_Hahn> . + +:wki_348 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Richard_Strauss> . + +:wki_349 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Richard_Wagner> . + +:wki_350 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Robert_Schumann> . + +:wki_351 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Robert_Simpson> . + +:wki_352 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Samuel_Barber> . + +:wki_353 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Scott_Joplin> . + + +:wki_355 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Sergei_Rachmaninoff> . + +:wki_356 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Sergei_Taneyev> . + +:wki_357 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Sigismond_Thalberg> . + +:wki_358 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Simon_Mayr> . + +:wki_359 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_T%C5%8Dru_Takemitsu> . + +:wki_360 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Takashi_Yoshimatsu> . + +:wki_361 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Thomas_Ad%C3%A8s> . + +:wki_362 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Thomas_Arne> . + +:wki_363 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Thomas_Tallis> . + +:wki_364 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Tomaso_Albinoni> . + +:wki_365 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Victor_Herbert> . + +:wki_366 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Vincent_d%27Indy> . + +#:wki_367 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Wilhelm_Kienzl> . + +:wki_368 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_William_Byrd> . + +:wki_369 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Witold_Lutos%C5%82awski> . + +:wki_370 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Wolfgang_Amadeus_Mozart> . + +:wki_371 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_compositions_by_Zden%C4%9Bk_Fibich> . + +:wki_372 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_concert_arias,_songs_and_canons_by_Wolfgang_Amadeus_Mozart> . + +:wki_373 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_dances_and_marches_by_Ziehrer> . + +:wki_374 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_fugal_works_by_Johann_Sebastian_Bach> . + +:wki_375 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_musical_compositions_by_Arthur_Sullivan> . + +:wki_376 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_operas_by_Handel> . + +:wki_377 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_operas_by_Mayr> . + +#:wki_378 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_recorded_compositions_by_Ferruccio_Busoni> . + +:wki_379 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_solo_piano_compositions_by_Felix_Mendelssohn> . + +:wki_380 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_solo_piano_compositions_by_Franz_Schubert> . + +:wki_381 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_solo_piano_compositions_by_Johannes_Brahms> . + +:wki_382 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_solo_piano_compositions_by_Robert_Schumann> . + +:wki_383 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_solo_piano_compositions_by_Wolfgang_Amadeus_Mozart> . + +:wki_384 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_songs_and_arias_of_Johann_Sebastian_Bach> . + +#:wki_386 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_symphonies_in_F_major> . + +:wki_387 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/List_of_works_by_Karl_von_Ordonez> . + +#:wki_388 a classical:WikipediaWorksListImporter ; +# property:source <file:source-data/en.wikipedia.org/works/List_of_works_for_the_stage_by_Wagner> . + +:wki_389 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/Schubert_compositions_D_number_1-504> . + +:wki_390 a classical:WikipediaWorksListImporter ; + property:source <file:source-data/en.wikipedia.org/works/Schubert_compositions_D_number_505-998> . +