# HG changeset patch # User Chris Cannam # Date 1267198015 0 # Node ID 2b574b88778ed6ca7d266170e02278b2b7119c78 # Parent 437442790e51036cba2da0263c42c40a430218c6 * Add sameAs record to composer &c * Add merge facility to composer * Sort ntriples database file diff -r 437442790e51 -r 2b574b88778e common/Objects.cpp --- a/common/Objects.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/common/Objects.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -539,6 +539,45 @@ return score; } +void +Composer::mergeFrom(Composer *c) +{ + QString name = c->name(); + QSet allNames = c->aliases(); + allNames.insert(name); + + foreach (QString n, allNames) { + if (n != m_name && !m_aliases.contains(n)) { + m_aliases.insert(n); + m_namesCached = false; + } + } + + if (!m_birth) { + if (c->birth()) m_birth = new Birth(*c->birth()); + } + + if (!m_death) { + if (c->death()) m_death = new Death(*c->death()); + } + + if (c->gender() != "") { + if (m_gender == "") { + m_gender = c->gender(); + } else if (c->gender() != m_gender) { + std::cerr << "WARNING: Composer::mergeFrom: Gender mismatch! Composer " << c->name().toStdString() << " has gender " << c->gender().toStdString() << ", but target composer " << m_name.toStdString() << " has gender " << m_gender.toStdString() << std::endl; + } + } + + m_nationality.unite(c->nationality()); + m_geonameURIs.unite(c->geonameURIs()); + m_otherURIs.unite(c->otherURIs()); + m_pages.unite(c->pages()); + + if (m_period == "") m_period = c->period(); + if (m_remarks == "") m_remarks = c->remarks(); +} + static int compare(QString a, QString b) { diff -r 437442790e51 -r 2b574b88778e common/Objects.h --- a/common/Objects.h Fri Feb 26 11:26:16 2010 +0000 +++ b/common/Objects.h Fri Feb 26 15:26:55 2010 +0000 @@ -16,6 +16,8 @@ #include #include +#include + namespace ClassicalData { class Year @@ -29,10 +31,12 @@ struct Encoder : public Dataquay::Node::VariantEncoder { QString fromVariant(const QVariant &v) { - return QString("%1").arg(v.value().toInt()); + QString s = QString("%1").arg(v.value().toInt()); + return s; } QVariant toVariant(const QString &s) { - return QVariant::fromValue(s.toInt()); + QVariant v = QVariant::fromValue(s.toInt()); + return v; } }; @@ -52,6 +56,7 @@ HistoricalEvent() : m_year(0), m_place(), m_approximate(false) { } HistoricalEvent(Year y) : m_year(y), m_approximate(false) { } HistoricalEvent(Year y, QString p) : m_year(y), m_place(p), m_approximate(false) { } + HistoricalEvent(const HistoricalEvent &h) : QObject(), m_year(h.m_year), m_place(h.m_place), m_approximate(h.m_approximate) { } Year year() const { return m_year; } void setYear(Year y) { m_year = y; } @@ -154,6 +159,7 @@ Q_PROPERTY(QSet aliases READ aliases WRITE setAliases STORED true) Q_PROPERTY(QString remarks READ remarks WRITE setRemarks STORED true) Q_PROPERTY(QSet pages READ pages WRITE setPages STORED true) + Q_PROPERTY(QSet otherURIs READ otherURIs WRITE setOtherURIs STORED true) public: NamedEntity(QObject *parent = 0) : QObject(parent) { } @@ -172,11 +178,16 @@ void addPage(Document *p) { m_pages.insert(p); } void setPages(QSet p) { m_pages = p; } //!!! destroy old ones? do we own? + QSet otherURIs() const { return m_otherURIs; } + void addOtherURI(Dataquay::Uri u) { m_otherURIs.insert(u); } + void setOtherURIs(QSet u) { m_otherURIs = u; } + protected: QString m_name; QString m_remarks; QSet m_aliases; QSet m_pages; + QSet m_otherURIs; }; class Movement; @@ -402,6 +413,15 @@ float matchTyping(QString text) const; /** + * Merge data from the given composer into this composer record. + * That is, add the composer's name and aliases as aliases of this + * composer, copy its dates where we lack them, etc. In all + * cases, values that exist in this composer already are preferred + * over values from the "other" composer. + */ + void mergeFrom(Composer *c); + + /** * Return the supplied name reduced into a "simplified" form, * eliminating many of the differences often found particularly in * European language names that have been anglicised. Used in diff -r 437442790e51 -r 2b574b88778e common/TypeRegistrar.cpp --- a/common/TypeRegistrar.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/common/TypeRegistrar.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -138,6 +138,7 @@ mapper->addPropertyMapping("ClassicalData::Composer", "birth", "property:birth"); mapper->addPropertyMapping("ClassicalData::Composer", "death", "property:death"); mapper->addPropertyMapping("ClassicalData::Composer", "geonameURIs", "foaf:based_near"); + mapper->addPropertyMapping("ClassicalData::Composer", "otherURIs", "owl:sameAs"); mapper->addTypeMapping("ClassicalData::Birth", "bio:Birth"); mapper->addTypeMapping("ClassicalData::Death", "bio:Death"); diff -r 437442790e51 -r 2b574b88778e import/build-database.sh --- a/import/build-database.sh Fri Feb 26 11:26:16 2010 +0000 +++ b/import/build-database.sh Fri Feb 26 15:26:55 2010 +0000 @@ -17,7 +17,7 @@ echo "Running importer, log is written to importer.log" -#./importer 2>importer.log || exit 1 +./importer 2>importer.log || exit 1 echo "Assembling additional sources" @@ -34,8 +34,10 @@ cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples done +sort ready.ntriples > ready.2.ntriples && mv ready.2.ntriples ready.ntriples + grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \ - sed 's/>.*//' | sort | uniq > check/new-composer-uris + sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$ diff -r 437442790e51 -r 2b574b88778e utilities/composer/composer.cpp --- a/utilities/composer/composer.cpp Fri Feb 26 11:26:16 2010 +0000 +++ b/utilities/composer/composer.cpp Fri Feb 26 15:26:55 2010 +0000 @@ -35,11 +35,16 @@ cerr << "Importing from URL " << url << " ..."; try { - store->import(url, BasicStore::ImportPermitDuplicates, "ntriples"); + store->import(url, BasicStore::ImportPermitDuplicates); } catch (RDFException e) { - cerr << "failed" << endl; - cerr << "Import failed: " << e.what() << endl; - return false; + cerr << " retrying with explicit ntriples type..."; + try { + store->import(url, BasicStore::ImportPermitDuplicates, "ntriples"); + } catch (RDFException e) { + cerr << "failed" << endl; + cerr << "Import failed: " << e.what() << endl; + return false; + } } cerr << " done" << endl; @@ -52,11 +57,13 @@ int s = 0; for (int i = 0; name[i]; ++i) if (name[i] == '/') s = i + 1; name = name + s; - cerr << "Usage: " << name << " list" << endl; - cerr << "Usage: " << name << " list-uris" << endl; - cerr << "Usage: " << name << " show [ ...]" << endl; - cerr << "Usage: " << name << " search " << endl; - cerr << "Usage: " << name << " match " << endl; + cerr << "Usage:" << endl; + cerr << " " << name << " list" << endl; + cerr << " " << name << " list-uris" << endl; + cerr << " " << name << " show [ ...]" << endl; + cerr << " " << name << " search " << endl; + cerr << " " << name << " match " << endl; + cerr << " " << name << " merge [ ...]" << endl; exit(-1); } @@ -98,7 +105,10 @@ cout << " " << c->remarks() << endl; } foreach (Document *d, c->pages()) { - cout << " " << d->siteName() << " -> " << d->uri() << endl; + cout << d->siteName() << " -> " << d->uri() << endl; + } + foreach (Uri u, c->otherURIs()) { + cout << "Same as " << u << endl; } } @@ -205,17 +215,60 @@ showSearchResults(matches, 5); } +QList +matchWildcard(QString text) +{ + if (!text.contains('/') && !text.contains('*')) { + text = "*" + text + "*"; + } + QRegExp re(text, Qt::CaseInsensitive, QRegExp::Wildcard); + QList results; + foreach (Composer *c, allComposers) { + if (re.exactMatch(c->property("uri").value().toString())) { + results.push_back(c); + } + } + return results; +} + +Composer * +matchSingle(QString text) +{ + QList matches = matchWildcard(text); + if (matches.empty()) { + cerr << "matchSingle: No matches for " << text << endl; + return 0; + } else if (matches.size() > 1) { + cerr << "matchSingle: Multiple matches for " << text << endl; + return 0; + } + return matches[0]; +} + void showWildcard(QString text) { cout << "Showing URI or wildcard: " << text << endl; - QRegExp re(text, Qt::CaseInsensitive, QRegExp::Wildcard); - foreach (Composer *c, allComposers) { - if (re.exactMatch(c->property("uri").value().toString())) { - cout << endl; - show(c); - } + cout << endl; + foreach (Composer *c, matchWildcard(text)) { + show(c); + cout << endl; } +} + +void +merge(Composer *target, QList sources) +{ + cout << "Merging into this composer record:" << endl << endl; + show(target); + cout << endl << "... the following composer record(s):" << endl; + foreach (Composer *c, sources) { + cout << endl; + show(c); + target->mergeFrom(c); + } + cout << endl << "Result after merging:" << endl << endl;; + show(target); cout << endl; } @@ -260,6 +313,8 @@ } } + bool write = false; + if (command == "list") { if (!args.empty()) usage(argv[0]); listBrief(allComposers); @@ -270,9 +325,6 @@ if (args.empty()) usage(argv[0]); if (command == "show") { foreach (QString s, args) { - if (!s.contains('/') && !s.contains('*')) { - s = "*" + s + "*"; - } showWildcard(s); } } else if (command == "search") { @@ -283,8 +335,43 @@ foreach (QString s, args) { match(s); } + } else if (command == "merge") { + if (args.size() < 2) usage(argv[0]); + Composer *target = matchSingle(args[0]); + if (!target) return 1; + QList sources; + for (int i = 1; i < args.size(); ++i) { + Composer *c = matchSingle(args[i]); + if (!c) return 1; + sources.push_back(c); + } + merge(target, sources); + write = true; } } + if (write) { + BasicStore *outstore = new BasicStore(); + outstore->setBaseUri(Uri("http://dbtune.org/classical/resource/")); + ObjectMapper *outmapper = new ObjectMapper(outstore); + + TypeRegistrar::addMappings(outstore, outmapper); + + outmapper->setPropertyStorePolicy(ObjectMapper::StoreIfChanged); + outmapper->setObjectStorePolicy(ObjectMapper::StoreAllObjects); + outmapper->setBlankNodePolicy(ObjectMapper::NoBlankNodes); + + cerr << "Mapping results back to store..."; + outmapper->storeAllObjects(root->children()); + cerr << " done" << endl; + + cerr << "Saving to file out.ttl..."; + outstore->save("out.ttl"); + cerr << " done" << endl; + + delete outmapper; + delete outstore; + } + }