Mercurial > hg > classical
view testapp/Loader.cpp @ 10:d35e5d769c87 classical-rdf
* some experiments with composer name matching
author | Chris Cannam |
---|---|
date | Wed, 17 Feb 2010 19:26:48 +0000 |
parents | 9e2b203254ab |
children | 98047b91b09d |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ #include "Objects.h" #include "TypeRegistrar.h" #include <dataquay/BasicStore.h> #include <dataquay/objectmapper/ObjectMapper.h> #include <dataquay/Debug.h> #include <QTemporaryFile> #include <QMultiMap> #include <iostream> using namespace Dataquay; using namespace ClassicalData; bool load(BasicStore *store, QString resourceName) { QTemporaryFile tf; if (!tf.open()) return false; tf.setAutoRemove(true); QFile f(resourceName); if (!f.open(QFile::ReadOnly)) return false; QByteArray buffer; int bufsiz = 10240; while (!f.atEnd()) { buffer = f.read(bufsiz); tf.write(buffer); } std::cerr << "unpacked, importing..." << std::endl; store->import("file://" + tf.fileName(), BasicStore::ImportPermitDuplicates, // fastest mode "ntriples"); return true; } //!!! both nasty and duplicated from Import.cpp QString makeNameKey(QString name) { QString key = name.toLower() .replace("'", "") .replace("x", "ks") .replace("y", "i") .replace("k", "c") .replace("ch", "c") .replace("cc", "c") .replace("v", "f") .replace("ff", "f") .replace("th", "t") .replace("tch", "ch") .replace("er", "r"); // DEBUG << "makeNameKey(" << name << "): " << key << endl; return key; } bool namesFuzzyMatch(QString an, Composer *b) { // ew! QString bn = b->name(); if (bn == an) return true; if (b->aliases().contains(an)) return true; int aSurnameIndex = 0, bSurnameIndex = 0; if (an.contains(",")) { an.replace(",", ""); } else { aSurnameIndex = -1; } if (bn.contains(",")) { bn.replace(",", ""); } else { bSurnameIndex = -1; } QStringList nl = an.split(QRegExp("[ -]")); QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); int matchCount = 0; QString surnameMatch = ""; if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; if (nl[aSurnameIndex][0].isUpper() && nl[aSurnameIndex] != "Della" && makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { surnameMatch = nl[aSurnameIndex]; } // DEBUG << "bnl: " << endl; // for (int i = 0; i < bnl.size(); ++i) DEBUG << bnl[i] << endl; int tested = 0; foreach (QString elt, nl) { int score = 2; if (!elt[0].isUpper() || elt == "Della") score = 1; QString k = makeNameKey(elt); // DEBUG << "Testing " << k << endl; if (bnl.contains(k)) { matchCount += score; } if (++tested == 2 && matchCount == 0) { return false; } } if (surnameMatch != "") { // DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; if (matchCount > 1) { return true; } else { // DEBUG << "(but not enough else matched)" << endl; return false; } } return false; } int main(int argc, char **argv) { BasicStore *store = new BasicStore(); store->setBaseUri("http://dbtune.org/classical/resource/"); ObjectMapper *mapper = new ObjectMapper(store); TypeRegistrar::addMappings(store, mapper); if (!load(store, ":data.ntriples")) { std::cerr << "Failed to unpack and load resource" << std::endl; return 1; } std::cerr << "imported, mapping..." << std::endl; QObject *root = mapper->loadAllObjects(0); delete mapper; delete store; QObjectList composers; std::cerr << "Known composers:" << std::endl; foreach (QObject *o, root->children()) { Composer *c = qobject_cast<Composer *>(o); if (c) { QString sn = c->getSortName(true); if (sn == "") { std::cerr << "WARNING: Composer " << c->name().toStdString() << " (URI " << c->property("uri").toString().toStdString() << ") has no sort-name" << std::endl; } else { std::cerr << sn.toStdString() << std::endl; } composers.push_back(c); } } for (int i = 1; i < argc; ++i) { QString name = argv[i]; std::cerr << "Name: " << name.toStdString() << std::endl; QMultiMap<int, QString> matches; foreach (QObject *o, composers) { Composer *c = qobject_cast<Composer *>(o); if (!c) continue; int value = c->matchFuzzyName(name); matches.insert(value, c->getSortName(false)); } for (QMultiMap<int, QString>::const_iterator i = matches.begin(); i != matches.end(); ++i) { if (i.key() < 0) continue; std::cerr << "Score: " << i.key() << " for name: " << i.value().toStdString() << std::endl; } } /* std::cerr << "mapped, storing again..." << std::endl; // let's try just writing out the composers BasicStore *outstore = new BasicStore(); outstore->setBaseUri("http://dbtune.org/classical/resource/"); ObjectMapper *outmapper = new ObjectMapper(outstore); TypeRegistrar::addMappings(outstore, outmapper); // outmapper->storeObjectTree(root); outmapper->storeAllObjects(composers); delete outmapper; std::cerr << "stored, saving..." << std::endl; outstore->save("test-output.ttl"); delete outstore; */ }