view testapp/Loader.cpp @ 10:d35e5d769c87 classical-rdf

* some experiments with composer name matching
author Chris Cannam
date Wed, 17 Feb 2010 19:26:48 +0000
parents 9e2b203254ab
children 98047b91b09d
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */

#include "Objects.h"
#include "TypeRegistrar.h"

#include <dataquay/BasicStore.h>
#include <dataquay/objectmapper/ObjectMapper.h>
#include <dataquay/Debug.h>

#include <QTemporaryFile>
#include <QMultiMap>

#include <iostream>

using namespace Dataquay;
using namespace ClassicalData;

bool
load(BasicStore *store, QString resourceName)
{
    QTemporaryFile tf;
    if (!tf.open()) return false;
    tf.setAutoRemove(true);
    QFile f(resourceName);
    if (!f.open(QFile::ReadOnly)) return false;
    QByteArray buffer;
    int bufsiz = 10240;
    while (!f.atEnd()) {
	buffer = f.read(bufsiz);
	tf.write(buffer);
    }
    std::cerr << "unpacked, importing..." << std::endl;
    store->import("file://" + tf.fileName(),
		  BasicStore::ImportPermitDuplicates, // fastest mode
		  "ntriples");
    return true;
}

//!!! both nasty and duplicated from Import.cpp

QString makeNameKey(QString name)
{
    QString key = name.toLower()
        .replace("'", "")
        .replace("x", "ks")
        .replace("y", "i")
        .replace("k", "c")
        .replace("ch", "c")
        .replace("cc", "c")
        .replace("v", "f")
        .replace("ff", "f")
        .replace("th", "t")
        .replace("tch", "ch")
        .replace("er", "r");
//    DEBUG << "makeNameKey(" << name << "): " << key << endl;
    return key;
}

bool namesFuzzyMatch(QString an, Composer *b)
{
    // ew!

    QString bn = b->name();
    if (bn == an) return true;
    if (b->aliases().contains(an)) return true;
    int aSurnameIndex = 0, bSurnameIndex = 0;
    if (an.contains(",")) {
        an.replace(",", "");
    } else {
        aSurnameIndex = -1;
    }
    if (bn.contains(",")) {
        bn.replace(",", "");
    } else {
        bSurnameIndex = -1;
    }
    QStringList nl = an.split(QRegExp("[ -]"));
    QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
    int matchCount = 0;
    QString surnameMatch = "";
    if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
    if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
    if (nl[aSurnameIndex][0].isUpper() &&
        nl[aSurnameIndex] != "Della" &&
        makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
        surnameMatch = nl[aSurnameIndex];
    }
//    DEBUG << "bnl: " << endl;
//    for (int i = 0; i < bnl.size(); ++i) DEBUG << bnl[i] << endl;
    int tested = 0;
    foreach (QString elt, nl) {
        int score = 2;
        if (!elt[0].isUpper() || elt == "Della") score = 1;
        QString k = makeNameKey(elt);
//	DEBUG << "Testing " << k << endl;
        if (bnl.contains(k)) {
            matchCount += score;
        }
        if (++tested == 2 && matchCount == 0) {
            return false;
        }
    }
    if (surnameMatch != "") {
//        DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
        if (matchCount > 1) {
            return true;
        } else {
//            DEBUG << "(but not enough else matched)" << endl;
            return false;
        }
    }
    return false;
}

int main(int argc, char **argv)
{
    BasicStore *store = new BasicStore();
    store->setBaseUri("http://dbtune.org/classical/resource/");
    ObjectMapper *mapper = new ObjectMapper(store);

    TypeRegistrar::addMappings(store, mapper);

    if (!load(store, ":data.ntriples")) {
	std::cerr << "Failed to unpack and load resource" << std::endl;
	return 1;
    }

    std::cerr << "imported, mapping..." << std::endl;

    QObject *root = mapper->loadAllObjects(0);

    delete mapper;
    delete store;
    
    QObjectList composers;
    std::cerr << "Known composers:" << std::endl;
    foreach (QObject *o, root->children()) {
        Composer *c = qobject_cast<Composer *>(o);
        if (c) {
            QString sn = c->getSortName(true);
            if (sn == "") {
                std::cerr << "WARNING: Composer " << c->name().toStdString() << " (URI " << c->property("uri").toString().toStdString() << ") has no sort-name" << std::endl;
            } else {
                std::cerr << sn.toStdString() << std::endl;
            }
            composers.push_back(c);
        }
    }

    for (int i = 1; i < argc; ++i) {
	QString name = argv[i];
	std::cerr << "Name: " << name.toStdString() << std::endl;
        QMultiMap<int, QString> matches;
	foreach (QObject *o, composers) {
	    Composer *c = qobject_cast<Composer *>(o);
	    if (!c) continue;
            int value = c->matchFuzzyName(name);
            matches.insert(value, c->getSortName(false));
	}
        for (QMultiMap<int, QString>::const_iterator i = matches.begin();
             i != matches.end(); ++i) {
            if (i.key() < 0) continue;
            std::cerr << "Score: " << i.key() << " for name: " << i.value().toStdString() << std::endl;
        }
    }

/*
    std::cerr << "mapped, storing again..." << std::endl;

    // let's try just writing out the composers

    BasicStore *outstore = new BasicStore();
    outstore->setBaseUri("http://dbtune.org/classical/resource/");
    ObjectMapper *outmapper = new ObjectMapper(outstore);

    TypeRegistrar::addMappings(outstore, outmapper);

//    outmapper->storeObjectTree(root);
    outmapper->storeAllObjects(composers);
    delete outmapper;

    std::cerr << "stored, saving..." << std::endl;

    outstore->save("test-output.ttl");

    delete outstore;
*/
}