Mercurial > hg > classical
changeset 24:2b574b88778e classical-rdf
* Add sameAs record to composer &c
* Add merge facility to composer
* Sort ntriples database file
author | Chris Cannam |
---|---|
date | Fri, 26 Feb 2010 15:26:55 +0000 |
parents | 437442790e51 |
children | e856df83c57f |
files | common/Objects.cpp common/Objects.h common/TypeRegistrar.cpp import/build-database.sh utilities/composer/composer.cpp |
diffstat | 5 files changed, 172 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/common/Objects.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/common/Objects.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -539,6 +539,45 @@ return score; } +void +Composer::mergeFrom(Composer *c) +{ + QString name = c->name(); + QSet<QString> allNames = c->aliases(); + allNames.insert(name); + + foreach (QString n, allNames) { + if (n != m_name && !m_aliases.contains(n)) { + m_aliases.insert(n); + m_namesCached = false; + } + } + + if (!m_birth) { + if (c->birth()) m_birth = new Birth(*c->birth()); + } + + if (!m_death) { + if (c->death()) m_death = new Death(*c->death()); + } + + if (c->gender() != "") { + if (m_gender == "") { + m_gender = c->gender(); + } else if (c->gender() != m_gender) { + std::cerr << "WARNING: Composer::mergeFrom: Gender mismatch! Composer " << c->name().toStdString() << " has gender " << c->gender().toStdString() << ", but target composer " << m_name.toStdString() << " has gender " << m_gender.toStdString() << std::endl; + } + } + + m_nationality.unite(c->nationality()); + m_geonameURIs.unite(c->geonameURIs()); + m_otherURIs.unite(c->otherURIs()); + m_pages.unite(c->pages()); + + if (m_period == "") m_period = c->period(); + if (m_remarks == "") m_remarks = c->remarks(); +} + static int compare(QString a, QString b) {
--- a/common/Objects.h Fri Feb 26 11:26:16 2010 +0000 +++ b/common/Objects.h Fri Feb 26 15:26:55 2010 +0000 @@ -16,6 +16,8 @@ #include <QMutexLocker> #include <QMap> +#include <iostream> + namespace ClassicalData { class Year @@ -29,10 +31,12 @@ struct Encoder : public Dataquay::Node::VariantEncoder { QString fromVariant(const QVariant &v) { - return QString("%1").arg(v.value<Year>().toInt()); + QString s = QString("%1").arg(v.value<Year>().toInt()); + return s; } QVariant toVariant(const QString &s) { - return QVariant::fromValue<Year>(s.toInt()); + QVariant v = QVariant::fromValue<Year>(s.toInt()); + return v; } }; @@ -52,6 +56,7 @@ HistoricalEvent() : m_year(0), m_place(), m_approximate(false) { } HistoricalEvent(Year y) : m_year(y), m_approximate(false) { } HistoricalEvent(Year y, QString p) : m_year(y), m_place(p), m_approximate(false) { } + HistoricalEvent(const HistoricalEvent &h) : QObject(), m_year(h.m_year), m_place(h.m_place), m_approximate(h.m_approximate) { } Year year() const { return m_year; } void setYear(Year y) { m_year = y; } @@ -154,6 +159,7 @@ Q_PROPERTY(QSet<QString> aliases READ aliases WRITE setAliases STORED true) Q_PROPERTY(QString remarks READ remarks WRITE setRemarks STORED true) Q_PROPERTY(QSet<ClassicalData::Document*> pages READ pages WRITE setPages STORED true) + Q_PROPERTY(QSet<Dataquay::Uri> otherURIs READ otherURIs WRITE setOtherURIs STORED true) public: NamedEntity(QObject *parent = 0) : QObject(parent) { } @@ -172,11 +178,16 @@ void addPage(Document *p) { m_pages.insert(p); } void setPages(QSet<Document *> p) { m_pages = p; } //!!! destroy old ones? do we own? + QSet<Dataquay::Uri> otherURIs() const { return m_otherURIs; } + void addOtherURI(Dataquay::Uri u) { m_otherURIs.insert(u); } + void setOtherURIs(QSet<Dataquay::Uri> u) { m_otherURIs = u; } + protected: QString m_name; QString m_remarks; QSet<QString> m_aliases; QSet<Document *> m_pages; + QSet<Dataquay::Uri> m_otherURIs; }; class Movement; @@ -402,6 +413,15 @@ float matchTyping(QString text) const; /** + * Merge data from the given composer into this composer record. + * That is, add the composer's name and aliases as aliases of this + * composer, copy its dates where we lack them, etc. In all + * cases, values that exist in this composer already are preferred + * over values from the "other" composer. + */ + void mergeFrom(Composer *c); + + /** * Return the supplied name reduced into a "simplified" form, * eliminating many of the differences often found particularly in * European language names that have been anglicised. Used in
--- a/common/TypeRegistrar.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/common/TypeRegistrar.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -138,6 +138,7 @@ mapper->addPropertyMapping("ClassicalData::Composer", "birth", "property:birth"); mapper->addPropertyMapping("ClassicalData::Composer", "death", "property:death"); mapper->addPropertyMapping("ClassicalData::Composer", "geonameURIs", "foaf:based_near"); + mapper->addPropertyMapping("ClassicalData::Composer", "otherURIs", "owl:sameAs"); mapper->addTypeMapping("ClassicalData::Birth", "bio:Birth"); mapper->addTypeMapping("ClassicalData::Death", "bio:Death");
--- a/import/build-database.sh Fri Feb 26 11:26:16 2010 +0000 +++ b/import/build-database.sh Fri Feb 26 15:26:55 2010 +0000 @@ -17,7 +17,7 @@ echo "Running importer, log is written to importer.log" -#./importer 2>importer.log || exit 1 +./importer 2>importer.log || exit 1 echo "Assembling additional sources" @@ -34,8 +34,10 @@ cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples done +sort ready.ntriples > ready.2.ntriples && mv ready.2.ntriples ready.ntriples + grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \ - sed 's/>.*//' | sort | uniq > check/new-composer-uris + sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$
--- a/utilities/composer/composer.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/utilities/composer/composer.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -35,11 +35,16 @@ cerr << "Importing from URL " << url << " ..."; try { - store->import(url, BasicStore::ImportPermitDuplicates, "ntriples"); + store->import(url, BasicStore::ImportPermitDuplicates); } catch (RDFException e) { - cerr << "failed" << endl; - cerr << "Import failed: " << e.what() << endl; - return false; + cerr << " retrying with explicit ntriples type..."; + try { + store->import(url, BasicStore::ImportPermitDuplicates, "ntriples"); + } catch (RDFException e) { + cerr << "failed" << endl; + cerr << "Import failed: " << e.what() << endl; + return false; + } } cerr << " done" << endl; @@ -52,11 +57,13 @@ int s = 0; for (int i = 0; name[i]; ++i) if (name[i] == '/') s = i + 1; name = name + s; - cerr << "Usage: " << name << " <input-rdf-file> list" << endl; - cerr << "Usage: " << name << " <input-rdf-file> list-uris" << endl; - cerr << "Usage: " << name << " <input-rdf-file> show <uri> [<uri> ...]" << endl; - cerr << "Usage: " << name << " <input-rdf-file> search <text>" << endl; - cerr << "Usage: " << name << " <input-rdf-file> match <text>" << endl; + cerr << "Usage:" << endl; + cerr << " " << name << " <input-rdf-file> list" << endl; + cerr << " " << name << " <input-rdf-file> list-uris" << endl; + cerr << " " << name << " <input-rdf-file> show <uri> [<uri> ...]" << endl; + cerr << " " << name << " <input-rdf-file> search <text>" << endl; + cerr << " " << name << " <input-rdf-file> match <text>" << endl; + cerr << " " << name << " <input-rdf-file> merge <target-uri> <dup> [<dup> ...]" << endl; exit(-1); } @@ -98,7 +105,10 @@ cout << " " << c->remarks() << endl; } foreach (Document *d, c->pages()) { - cout << " " << d->siteName() << " -> " << d->uri() << endl; + cout << d->siteName() << " -> " << d->uri() << endl; + } + foreach (Uri u, c->otherURIs()) { + cout << "Same as " << u << endl; } } @@ -205,17 +215,60 @@ showSearchResults(matches, 5); } +QList<Composer *> +matchWildcard(QString text) +{ + if (!text.contains('/') && !text.contains('*')) { + text = "*" + text + "*"; + } + QRegExp re(text, Qt::CaseInsensitive, QRegExp::Wildcard); + QList<Composer *> results; + foreach (Composer *c, allComposers) { + if (re.exactMatch(c->property("uri").value<Uri>().toString())) { + results.push_back(c); + } + } + return results; +} + +Composer * +matchSingle(QString text) +{ + QList<Composer *> matches = matchWildcard(text); + if (matches.empty()) { + cerr << "matchSingle: No matches for " << text << endl; + return 0; + } else if (matches.size() > 1) { + cerr << "matchSingle: Multiple matches for " << text << endl; + return 0; + } + return matches[0]; +} + void showWildcard(QString text) { cout << "Showing URI or wildcard: " << text << endl; - QRegExp re(text, Qt::CaseInsensitive, QRegExp::Wildcard); - foreach (Composer *c, allComposers) { - if (re.exactMatch(c->property("uri").value<Uri>().toString())) { - cout << endl; - show(c); - } + cout << endl; + foreach (Composer *c, matchWildcard(text)) { + show(c); + cout << endl; } +} + +void +merge(Composer *target, QList<Composer *> sources) +{ + cout << "Merging into this composer record:" << endl << endl; + show(target); + cout << endl << "... the following composer record(s):" << endl; + foreach (Composer *c, sources) { + cout << endl; + show(c); + target->mergeFrom(c); + } + cout << endl << "Result after merging:" << endl << endl;; + show(target); cout << endl; } @@ -260,6 +313,8 @@ } } + bool write = false; + if (command == "list") { if (!args.empty()) usage(argv[0]); listBrief(allComposers); @@ -270,9 +325,6 @@ if (args.empty()) usage(argv[0]); if (command == "show") { foreach (QString s, args) { - if (!s.contains('/') && !s.contains('*')) { - s = "*" + s + "*"; - } showWildcard(s); } } else if (command == "search") { @@ -283,8 +335,43 @@ foreach (QString s, args) { match(s); } + } else if (command == "merge") { + if (args.size() < 2) usage(argv[0]); + Composer *target = matchSingle(args[0]); + if (!target) return 1; + QList<Composer *> sources; + for (int i = 1; i < args.size(); ++i) { + Composer *c = matchSingle(args[i]); + if (!c) return 1; + sources.push_back(c); + } + merge(target, sources); + write = true; } } + if (write) { + BasicStore *outstore = new BasicStore(); + outstore->setBaseUri(Uri("http://dbtune.org/classical/resource/")); + ObjectMapper *outmapper = new ObjectMapper(outstore); + + TypeRegistrar::addMappings(outstore, outmapper); + + outmapper->setPropertyStorePolicy(ObjectMapper::StoreIfChanged); + outmapper->setObjectStorePolicy(ObjectMapper::StoreAllObjects); + outmapper->setBlankNodePolicy(ObjectMapper::NoBlankNodes); + + cerr << "Mapping results back to store..."; + outmapper->storeAllObjects(root->children()); + cerr << " done" << endl; + + cerr << "Saving to file out.ttl..."; + outstore->save("out.ttl"); + cerr << " done" << endl; + + delete outmapper; + delete outstore; + } + }