annotate testapp/Loader.cpp @ 10:d35e5d769c87 classical-rdf

* some experiments with composer name matching
author Chris Cannam
date Wed, 17 Feb 2010 19:26:48 +0000
parents 9e2b203254ab
children 98047b91b09d
rev   line source
Chris@10 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@7 2
Chris@7 3 #include "Objects.h"
Chris@7 4 #include "TypeRegistrar.h"
Chris@7 5
Chris@7 6 #include <dataquay/BasicStore.h>
Chris@7 7 #include <dataquay/objectmapper/ObjectMapper.h>
Chris@10 8 #include <dataquay/Debug.h>
Chris@7 9
Chris@7 10 #include <QTemporaryFile>
Chris@10 11 #include <QMultiMap>
Chris@7 12
Chris@7 13 #include <iostream>
Chris@7 14
Chris@7 15 using namespace Dataquay;
Chris@7 16 using namespace ClassicalData;
Chris@7 17
Chris@7 18 bool
Chris@7 19 load(BasicStore *store, QString resourceName)
Chris@7 20 {
Chris@7 21 QTemporaryFile tf;
Chris@7 22 if (!tf.open()) return false;
Chris@7 23 tf.setAutoRemove(true);
Chris@7 24 QFile f(resourceName);
Chris@7 25 if (!f.open(QFile::ReadOnly)) return false;
Chris@7 26 QByteArray buffer;
Chris@7 27 int bufsiz = 10240;
Chris@7 28 while (!f.atEnd()) {
Chris@7 29 buffer = f.read(bufsiz);
Chris@7 30 tf.write(buffer);
Chris@7 31 }
Chris@7 32 std::cerr << "unpacked, importing..." << std::endl;
Chris@7 33 store->import("file://" + tf.fileName(),
Chris@7 34 BasicStore::ImportPermitDuplicates, // fastest mode
Chris@7 35 "ntriples");
Chris@7 36 return true;
Chris@7 37 }
Chris@7 38
Chris@9 39 //!!! both nasty and duplicated from Import.cpp
Chris@9 40
Chris@9 41 QString makeNameKey(QString name)
Chris@9 42 {
Chris@9 43 QString key = name.toLower()
Chris@9 44 .replace("'", "")
Chris@9 45 .replace("x", "ks")
Chris@9 46 .replace("y", "i")
Chris@9 47 .replace("k", "c")
Chris@9 48 .replace("ch", "c")
Chris@9 49 .replace("cc", "c")
Chris@9 50 .replace("v", "f")
Chris@9 51 .replace("ff", "f")
Chris@9 52 .replace("th", "t")
Chris@9 53 .replace("tch", "ch")
Chris@9 54 .replace("er", "r");
Chris@9 55 // DEBUG << "makeNameKey(" << name << "): " << key << endl;
Chris@9 56 return key;
Chris@9 57 }
Chris@9 58
Chris@9 59 bool namesFuzzyMatch(QString an, Composer *b)
Chris@9 60 {
Chris@9 61 // ew!
Chris@9 62
Chris@9 63 QString bn = b->name();
Chris@9 64 if (bn == an) return true;
Chris@9 65 if (b->aliases().contains(an)) return true;
Chris@9 66 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@9 67 if (an.contains(",")) {
Chris@9 68 an.replace(",", "");
Chris@9 69 } else {
Chris@9 70 aSurnameIndex = -1;
Chris@9 71 }
Chris@9 72 if (bn.contains(",")) {
Chris@9 73 bn.replace(",", "");
Chris@9 74 } else {
Chris@9 75 bSurnameIndex = -1;
Chris@9 76 }
Chris@9 77 QStringList nl = an.split(QRegExp("[ -]"));
Chris@9 78 QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
Chris@9 79 int matchCount = 0;
Chris@9 80 QString surnameMatch = "";
Chris@9 81 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@9 82 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@9 83 if (nl[aSurnameIndex][0].isUpper() &&
Chris@9 84 nl[aSurnameIndex] != "Della" &&
Chris@9 85 makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
Chris@9 86 surnameMatch = nl[aSurnameIndex];
Chris@9 87 }
Chris@10 88 // DEBUG << "bnl: " << endl;
Chris@10 89 // for (int i = 0; i < bnl.size(); ++i) DEBUG << bnl[i] << endl;
Chris@9 90 int tested = 0;
Chris@9 91 foreach (QString elt, nl) {
Chris@10 92 int score = 2;
Chris@10 93 if (!elt[0].isUpper() || elt == "Della") score = 1;
Chris@9 94 QString k = makeNameKey(elt);
Chris@10 95 // DEBUG << "Testing " << k << endl;
Chris@9 96 if (bnl.contains(k)) {
Chris@10 97 matchCount += score;
Chris@9 98 }
Chris@9 99 if (++tested == 2 && matchCount == 0) {
Chris@9 100 return false;
Chris@9 101 }
Chris@9 102 }
Chris@9 103 if (surnameMatch != "") {
Chris@9 104 // DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@9 105 if (matchCount > 1) {
Chris@9 106 return true;
Chris@9 107 } else {
Chris@9 108 // DEBUG << "(but not enough else matched)" << endl;
Chris@9 109 return false;
Chris@9 110 }
Chris@9 111 }
Chris@9 112 return false;
Chris@9 113 }
Chris@9 114
Chris@7 115 int main(int argc, char **argv)
Chris@7 116 {
Chris@7 117 BasicStore *store = new BasicStore();
Chris@7 118 store->setBaseUri("http://dbtune.org/classical/resource/");
Chris@7 119 ObjectMapper *mapper = new ObjectMapper(store);
Chris@7 120
Chris@7 121 TypeRegistrar::addMappings(store, mapper);
Chris@7 122
Chris@7 123 if (!load(store, ":data.ntriples")) {
Chris@7 124 std::cerr << "Failed to unpack and load resource" << std::endl;
Chris@7 125 return 1;
Chris@7 126 }
Chris@7 127
Chris@7 128 std::cerr << "imported, mapping..." << std::endl;
Chris@7 129
Chris@8 130 QObject *root = mapper->loadAllObjects(0);
Chris@8 131
Chris@7 132 delete mapper;
Chris@7 133 delete store;
Chris@9 134
Chris@9 135 QObjectList composers;
Chris@10 136 std::cerr << "Known composers:" << std::endl;
Chris@9 137 foreach (QObject *o, root->children()) {
Chris@10 138 Composer *c = qobject_cast<Composer *>(o);
Chris@10 139 if (c) {
Chris@10 140 QString sn = c->getSortName(true);
Chris@10 141 if (sn == "") {
Chris@10 142 std::cerr << "WARNING: Composer " << c->name().toStdString() << " (URI " << c->property("uri").toString().toStdString() << ") has no sort-name" << std::endl;
Chris@10 143 } else {
Chris@10 144 std::cerr << sn.toStdString() << std::endl;
Chris@10 145 }
Chris@10 146 composers.push_back(c);
Chris@10 147 }
Chris@9 148 }
Chris@10 149
Chris@10 150 for (int i = 1; i < argc; ++i) {
Chris@10 151 QString name = argv[i];
Chris@9 152 std::cerr << "Name: " << name.toStdString() << std::endl;
Chris@10 153 QMultiMap<int, QString> matches;
Chris@9 154 foreach (QObject *o, composers) {
Chris@9 155 Composer *c = qobject_cast<Composer *>(o);
Chris@9 156 if (!c) continue;
Chris@10 157 int value = c->matchFuzzyName(name);
Chris@10 158 matches.insert(value, c->getSortName(false));
Chris@9 159 }
Chris@10 160 for (QMultiMap<int, QString>::const_iterator i = matches.begin();
Chris@10 161 i != matches.end(); ++i) {
Chris@10 162 if (i.key() < 0) continue;
Chris@10 163 std::cerr << "Score: " << i.key() << " for name: " << i.value().toStdString() << std::endl;
Chris@10 164 }
Chris@9 165 }
Chris@8 166
Chris@9 167 /*
Chris@8 168 std::cerr << "mapped, storing again..." << std::endl;
Chris@8 169
Chris@9 170 // let's try just writing out the composers
Chris@9 171
Chris@8 172 BasicStore *outstore = new BasicStore();
Chris@8 173 outstore->setBaseUri("http://dbtune.org/classical/resource/");
Chris@8 174 ObjectMapper *outmapper = new ObjectMapper(outstore);
Chris@8 175
Chris@8 176 TypeRegistrar::addMappings(outstore, outmapper);
Chris@8 177
Chris@9 178 // outmapper->storeObjectTree(root);
Chris@9 179 outmapper->storeAllObjects(composers);
Chris@8 180 delete outmapper;
Chris@8 181
Chris@8 182 std::cerr << "stored, saving..." << std::endl;
Chris@8 183
Chris@8 184 outstore->save("test-output.ttl");
Chris@8 185
Chris@8 186 delete outstore;
Chris@9 187 */
Chris@7 188 }
Chris@7 189