# HG changeset patch # User Chris Cannam # Date 1269693025 0 # Node ID 732fb6b754fba9ac8ab7c4147943282e6ce21359 # Parent 271cbaf6e8d9b027af3659697934ac05cb51a106 * More work guessing diff -r 271cbaf6e8d9 -r 732fb6b754fb common/Matcher.cpp --- a/common/Matcher.cpp Fri Mar 26 13:53:31 2010 +0000 +++ b/common/Matcher.cpp Sat Mar 27 12:30:25 2010 +0000 @@ -156,18 +156,28 @@ if (maxResults == 0 || matches.size() < maxResults) { - // Now, for slightly lower marks, test for strings like "Symphony - // no 8" at the start of the title + // Now, for slightly lower marks, test for strings like + // "Symphony no 8" at the start of the title, or after a + // colon, slash or dash (e.g. "Brahms: Symphony no 4") - QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QRegExp numberRe1("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QRegExp numberRe2("[/:-]\\s*(\\w[^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QString tag, number; - if (numberRe.indexIn(text) >= 0) { + if (numberRe1.indexIn(text) >= 0) { + tag = numberRe1.cap(1); + number = numberRe1.cap(2); + } else if (numberRe2.indexIn(text) >= 0) { + tag = numberRe2.cap(1); + number = numberRe2.cap(2); + } + + if (tag != "") { - QString tag, number; - tag = numberRe.cap(1); + std::cerr << "tag = \"" << tag.toStdString() << "\", number = \"" + << number.toStdString() << "\"" << std::endl; + tag.replace(QRegExp("[^\\w\\s]+"), ""); - number = numberRe.cap(2); - QString matcherReStr = QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number); QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive); @@ -258,11 +268,26 @@ int dist = ed.calculate(quoted, q, qthresh); if (dist < qthresh) { std::cerr << "quoted name match: " << q.toStdString() << std::endl; - score += 0.7f; + score += 0.7f - 0.1f * dist; } } } + if (score == 0.f) { + int ml = std::min(name.length(), text.length()); + int thresh = ml / 4; + int dist = ed.calculate(text.left(ml), name.left(ml), thresh); + if (dist < thresh) { + std::cerr << "title match: " << name.toStdString() << std::endl; + score += 0.8f - 0.05f * dist; + } + } + + //!!! how to avoid high scores for things that we should be + //!!! able to recognise as different? e.g. "Chamber Symphony + //!!! No. 2" scoring very highly as a match for "Chamber + //!!! Symphony No. 1" + if (score > 0.f) { matches.insert(Guess(score, w)); } diff -r 271cbaf6e8d9 -r 732fb6b754fb utilities/track/track.cpp --- a/utilities/track/track.cpp Fri Mar 26 13:53:31 2010 +0000 +++ b/utilities/track/track.cpp Sat Mar 27 12:30:25 2010 +0000 @@ -377,15 +377,28 @@ void guessWorkFromTitle(QString title, float scale, GuessSet &guesses) { if (title == "") return; - GuessSet myGuesses; - WorkCatalogueMatcher matcher(allWorks); - GuessList gl(matcher.match(title, 0)); - if (!gl.empty()) { - foreach (Guess guess, gl) { - myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + { + GuessSet myGuesses; + WorkCatalogueMatcher matcher(allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } } + integrateGuesses(guesses, myGuesses); } - integrateGuesses(guesses, myGuesses); + { + GuessSet myGuesses; + WorkTitleMatcher matcher(allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } + } + integrateGuesses(guesses, myGuesses); + } } void @@ -397,6 +410,14 @@ } void +guessWorkFromFilename(QString filename, float scale, GuessSet &guesses) +{ + cerr << "guessWorkFromFilename: " << filename << endl; + QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\.[^\\.]*"), ""); + guessWorkFromTitle(filepart, scale, guesses); +} + +void guess(QString track) { cout << endl; @@ -469,10 +490,11 @@ GuessSet workGuesses; guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses); guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "TALB", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "TALB", 0.4, workGuesses); guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses); guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "ALBUM", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "ALBUM", 0.4, workGuesses); + guessWorkFromFilename(track, 0.4, workGuesses); cerr << "Work guesses:" << endl; foreach (Guess g, workGuesses) {