Mercurial > hg > classical
view common/Matcher.cpp @ 34:271cbaf6e8d9
* First bits of works matching
author | Chris Cannam |
---|---|
date | Fri, 26 Mar 2010 13:53:31 +0000 |
parents | 84d6acb6b3ba |
children | 732fb6b754fb |
line wrap: on
line source
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ #include "Matcher.h" #include "Objects.h" #include "EditDistance.h" #include <QMultiMap> #include <iostream> using namespace Dataquay; namespace ClassicalData { ComposerTypingQuickMatcher::ComposerTypingQuickMatcher(QList<Composer *> cl) : m_composers(cl) { } GuessList ComposerTypingQuickMatcher::match(QString text, int maxResults, float threshold) const { GuessList results; GuessSet matches; foreach (Composer *c, m_composers) { float value = c->matchTypingQuick(text); if (value < threshold) continue; matches.insert(Guess(value, c)); } int n = 0; for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { results.push_back(*i); if (maxResults > 0 && ++n > maxResults) break; } return results; } ComposerTypingThoroughMatcher::ComposerTypingThoroughMatcher(QList<Composer *> cl) : m_composers(cl) { } GuessList ComposerTypingThoroughMatcher::match(QString text, int maxResults, float threshold) const { GuessList results; GuessSet matches; foreach (Composer *c, m_composers) { float value = c->matchTyping(text); if (value < threshold) continue; matches.insert(Guess(value, c)); } int n = 0; for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { results.push_back(*i); if (maxResults > 0 && ++n > maxResults) break; } return results; } ComposerFullTextMatcher::ComposerFullTextMatcher(QList<Composer *> cl) : m_composers(cl) { } GuessList ComposerFullTextMatcher::match(QString text, int maxResults, float threshold) const { GuessList results; GuessSet matches; foreach (Composer *c, m_composers) { float value = c->matchFuzzyName(text); if (value < threshold) continue; // std::cerr << "Liking: " << c->name().toStdString() << " (" << value << ")" << std::endl; matches.insert(Guess(value, c)); } int n = 0; for (GuessSet::iterator i = matches.begin(); i != matches.end(); ++i) { Guess g = *i; results.push_back(g); // std::cerr << "Pushing: " << g.entity()->name().toStdString() << std::endl; if (maxResults > 0 && ++n > maxResults) break; } return results; } WorkCatalogueMatcher::WorkCatalogueMatcher(QList<Work *> wl) : m_works(wl) { } GuessList WorkCatalogueMatcher::match(QString text, int maxResults, float threshold) const { GuessList results; GuessSet matches; QStringList cats = Work::extractCatalogueNumberTexts(text); if (cats.empty()) return results; foreach (QString cat, cats) { std::cerr << "testing cat \"" << cat.toStdString() << "\" against " << m_works.size() << " works" << std::endl; foreach (Work *w, m_works) { if (maxResults > 0 && matches.size() >= maxResults) { break; } QString catalogue = w->catalogue(); if (catalogue != "") { if (!Work::compareCatalogueNumberTexts(catalogue, cat)) { std::cerr << "We like: " << w->name().toStdString() << " (" << catalogue.toStdString() << ")" << std::endl; // all catalogue matches score equal here matches.insert(Guess(1.f, w)); continue; } } QString opus = w->opus(); QString number = w->number(); QString optext; if (opus != "") { if (number != "") { optext = QString("Op %1 no %2").arg(opus).arg(number); if (!Work::compareCatalogueNumberTexts(optext, cat)) { std::cerr << "We like: " << w->name().toStdString() << " (" << optext.toStdString() << ")" << std::endl; matches.insert(Guess(1.f, w)); continue; } } else { optext = QString("Op %1").arg(opus); if (!Work::compareCatalogueNumberTexts(optext, cat)) { std::cerr << "We like: " << w->name().toStdString() << " (" << optext.toStdString() << ")" << std::endl; matches.insert(Guess(1.f, w)); continue; } } } } } if (maxResults == 0 || matches.size() < maxResults) { // Now, for slightly lower marks, test for strings like "Symphony // no 8" at the start of the title QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); if (numberRe.indexIn(text) >= 0) { QString tag, number; tag = numberRe.cap(1); tag.replace(QRegExp("[^\\w\\s]+"), ""); number = numberRe.cap(2); QString matcherReStr = QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number); QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive); std::cerr << "matcherRe: " << matcherReStr.toStdString() << std::endl; // initials only /* nah, doesn't work well QString weakTag; QRegExp initialRe("\\b(\\w)\\w*\\b"); int ix = 0; while ((ix = initialRe.indexIn(tag, ix)) >= 0) { if (ix > 0) weakTag += "\\s+"; weakTag += initialRe.cap(1) + "\\w*"; ++ix; } QString weakMatcherReStr = QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(weakTag).arg(number); QRegExp weakMatcherRe(weakMatcherReStr, Qt::CaseInsensitive); std::cerr << "weakMatcherRe: " << weakMatcherReStr.toStdString() << std::endl; */ foreach (Work *w, m_works) { if (maxResults > 0 && matches.size() >= maxResults) { break; } QString name = w->name(); if (matcherRe.indexIn(name) >= 0) { std::cerr << "We quite like: " << name.toStdString() << std::endl; matches.insert(Guess(0.8f, w)); } /* else if (weakMatcherRe.indexIn(name) >= 0) { std::cerr << "We sorta like: " << name.toStdString() << std::endl; matches.insert(Guess(0.2f, w)); } */ } } } int n = 0; for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { results.push_back(*i); if (maxResults > 0 && ++n > maxResults) break; } return results; } WorkTitleMatcher::WorkTitleMatcher(QList<Work *> wl) : m_works(wl) { } GuessList WorkTitleMatcher::match(QString text, int maxResults, float threshold) const { GuessList results; GuessSet matches; QString quoted; QRegExp quoteRe("\\b[\"']([^\"]+)[\"']\\b"); int qthresh = 0; if (quoteRe.indexIn(text) >= 0) { quoted = quoteRe.cap(1); if (quoted.length() < 4) quoted = ""; qthresh = quoted.length() / 4; } std::cerr << "text = " << text.toStdString() << ", quoted = " << quoted.toStdString() << std::endl; EditDistance ed; foreach (Work *w, m_works) { if (maxResults > 0 && matches.size() >= maxResults) { break; } float score = 0.f; QString name = w->name(); if (quoted != "") { if (quoteRe.indexIn(name) >= 0) { QString q = quoteRe.cap(1); int dist = ed.calculate(quoted, q, qthresh); if (dist < qthresh) { std::cerr << "quoted name match: " << q.toStdString() << std::endl; score += 0.7f; } } } if (score > 0.f) { matches.insert(Guess(score, w)); } } int n = 0; for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { results.push_back(*i); if (maxResults > 0 && ++n > maxResults) break; } return results; } }