Mercurial > hg > classical
changeset 19:559a001e1bf5 classical-rdf
* greatly improve matchTyping function for Composer
author | Chris Cannam |
---|---|
date | Mon, 22 Feb 2010 18:09:55 +0000 |
parents | c8ef23d3888c |
children | c4cb65c436ef |
files | common/Objects.cpp common/Objects.h |
diffstat | 2 files changed, 56 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/common/Objects.cpp Mon Feb 22 14:18:30 2010 +0000 +++ b/common/Objects.cpp Mon Feb 22 18:09:55 2010 +0000 @@ -470,43 +470,72 @@ } float -Composer::matchTyping(QString text) const +Composer::matchTyping(QString t) const { - //!!! not good - - if (text == "") return 0; + if (t == "") return 0; cacheNames(); - float fameBonus = m_pages.size() / 10.f; + float fameBonus = m_pages.size() / 40.f; - EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true); + QString n = name().replace(",", "").toLower(); + t = t.replace(",", "").toLower(); - QString n = name().replace(",", "").toLower(); - text = text.replace(",", "").toLower(); - - int nl = n.length(); - int tl = text.length(); - + if (n == t) return 1.f + fameBonus; + if (n.startsWith(t)) return 0.8f + fameBonus; + float score = 0.f; - if (n == text) score += 1.f; - else if (n.startsWith(text)) score += 0.8f; - else if (nl >= tl) { - if (tl > 3 && n.contains(text)) { - score += 0.3; + static QRegExp sre("[\\., -]+"); + QStringList nl = n.split(sre, QString::SkipEmptyParts); + QStringList tl = t.split(sre, QString::SkipEmptyParts); + if (nl.empty() || tl.empty()) return 0.f; + + int unmatched = 0; + for (int i = 0; i < tl.size(); ++i) { + int ni = 0; + for (ni = 0; ni < nl.size(); ++ni) { + if (tl[i] == nl[ni]) { + if (tl[i].length() > 1) { + score += 0.2; + } else { + score += 0.1; + } + } else if (nl[ni].startsWith(tl[i])) { + score += 0.1; + } else if (nl[ni].startsWith(tl[i][0])) { + score += 0.03; + } else { + continue; + } + break; } - int dist = ed.calculate(n.left(text.length()), text, nl/4); - if (dist <= nl/4) { - score += 0.6 - ((0.5 * dist) / (nl/4)); + if (ni == nl.size()+1) { + ++unmatched; } - } else { - int dist = ed.calculate(n, text, nl/4); - if (dist < nl/4 && dist < 3) score += 0.3; + } + if (nl[0] == tl[0]) { + score += 0.2; + } + if (score > unmatched * 0.1) { + score -= unmatched * 0.1; + } else if (score > 0.1) { + score = 0.1; } - if (text.contains(" ") && matchFuzzyName(text) >= 1.f) score += 0.9; + float fuzzyScore = matchFuzzyName(t); + if (t.contains(" ") && fuzzyScore >= 1.f) score += 0.4; + + if (score == 0.f) { + EditDistance ed(EditDistance::RestrictedTransposition, 1, 1, true); + if (nl.length() > tl.length()) { + int dist = calculateThresholdedDistance(ed, t, n.left(t.length())); + if (dist >= 0 && dist < 3) score += (3 - dist) * 0.05; + } else { + score += fuzzyScore / 10.f; + } + } + if (score > 0.f) score += fameBonus; - return score; }
--- a/common/Objects.h Mon Feb 22 14:18:30 2010 +0000 +++ b/common/Objects.h Mon Feb 22 18:09:55 2010 +0000 @@ -3,6 +3,8 @@ #ifndef _CLASSICAL_DATA_OBJECTS_H_ #define _CLASSICAL_DATA_OBJECTS_H_ +#include <dataquay/Uri.h> + #include <QObject> #include <QMetaType> #include <QString> @@ -13,8 +15,6 @@ #include <QMutexLocker> #include <QMap> -#include <dataquay/Uri.h> - namespace ClassicalData { class HistoricalEvent : public QObject