Mercurial > hg > classical
changeset 16:cb315ba61e03 classical-rdf
* Introduce a not very good typing matcher
author | Chris Cannam |
---|---|
date | Fri, 19 Feb 2010 17:40:17 +0000 |
parents | 701702f8959a |
children | 06fcbfe2a6ed |
files | common/EditDistance.h common/Objects.cpp common/Objects.h testapp/Loader.cpp |
diffstat | 4 files changed, 60 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/common/EditDistance.h Fri Feb 19 14:53:22 2010 +0000 +++ b/common/EditDistance.h Fri Feb 19 17:40:17 2010 +0000 @@ -17,8 +17,8 @@ }; EditDistance(TranspositionMode tm = RestrictedTransposition, - int editPenalty = 1, - int suffixPenalty = 1, + int editPenalty = 1, //!!! probably better to lose this + int suffixPenalty = 1, //!!! probably better to lose this bool normalise = true) : m_transpositionMode(tm), m_editPenalty(editPenalty),
--- a/common/Objects.cpp Fri Feb 19 14:53:22 2010 +0000 +++ b/common/Objects.cpp Fri Feb 19 17:40:17 2010 +0000 @@ -414,6 +414,8 @@ QString reduced = reduceName(elt); + //!!! these don't seem to match often... + if (m_reducedSurnameElements.contains(reduced)) { score += 10; haveSurname = true; @@ -473,6 +475,47 @@ return fscore; } +float +Composer::matchTyping(QString text) const +{ + //!!! not good + + if (text == "") return 0; + + cacheNames(); + float fameBonus = m_pages.size() / 10.f; + + EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true); + + QString n = name().replace(",", "").toLower(); + text = text.replace(",", "").toLower(); + + int nl = n.length(); + int tl = text.length(); + + float score = 0.f; + + if (n == text) score += 1.f; + else if (n.startsWith(text)) score += 0.8f; + else if (nl >= tl) { + if (tl > 3 && n.contains(text)) { + score += 0.3; + } + int dist = ed.calculate(n.left(text.length()), text, nl/4); + if (dist <= nl/4) { + score += 0.6 - ((0.5 * dist) / (nl/4)); + } + } else { + int dist = ed.calculate(n, text, nl/4); + if (dist < nl/4 && dist < 3) score += 0.3; + } + + if (text.contains(" ") && matchFuzzyName(text) >= 1.f) score += 0.9; + if (score > 0.f) score += fameBonus; + + return score; +} + static int compare(QString a, QString b) {
--- a/common/Objects.h Fri Feb 19 14:53:22 2010 +0000 +++ b/common/Objects.h Fri Feb 19 17:40:17 2010 +0000 @@ -355,7 +355,8 @@ * composer name with unpredictable formatting and spelling (and * probably incomplete), return an estimate for the likelihood * that the intended composer was this one. Higher return values - * indicate greater confidence. + * indicate greater confidence; a value of 1.0 or more indicates + * that all of the input is at least close to perfectly matched. */ float matchFuzzyName(QString name) const; @@ -364,12 +365,21 @@ * composer name with unpredictable formatting and spelling (and * probably incomplete), return an estimate for the likelihood * that the intended composer was this one. Higher return values - * indicate greater confidence. The supplied name should have - * been lower-cased and split on non-alphabetical characters. + * indicate greater confidence; a value of 1.0 or more indicates + * that all of the input is at least close to perfectly matched. + * The supplied name should have been lower-cased and split on + * non-alphabetical characters. */ float matchFuzzyName(QStringList name) const; /** + * Given a string that is in the process of being typed by the + * user, return an estimate of the likelihood that the text is + * intended to become this composer's name. + */ + float matchTyping(QString text) const; + + /** * Return the supplied name reduced into a "simplified" form, * eliminating many of the differences often found particularly in * European language names that have been anglicised. Used in
--- a/testapp/Loader.cpp Fri Feb 19 14:53:22 2010 +0000 +++ b/testapp/Loader.cpp Fri Feb 19 17:40:17 2010 +0000 @@ -105,7 +105,8 @@ foreach (QObject *o, composers) { Composer *c = qobject_cast<Composer *>(o); if (!c) continue; - float value = c->matchFuzzyName(elements); +// float value = c->matchFuzzyName(elements); + float value = c->matchTyping(QString::fromStdString(s)); matches.insert(value, c->getSortName(false)); } int n = 0;