Mercurial > hg > classical
changeset 35:732fb6b754fb
* More work guessing
author | Chris Cannam |
---|---|
date | Sat, 27 Mar 2010 12:30:25 +0000 |
parents | 271cbaf6e8d9 |
children | 48d8fec75afb |
files | common/Matcher.cpp utilities/track/track.cpp |
diffstat | 2 files changed, 65 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/common/Matcher.cpp Fri Mar 26 13:53:31 2010 +0000 +++ b/common/Matcher.cpp Sat Mar 27 12:30:25 2010 +0000 @@ -156,18 +156,28 @@ if (maxResults == 0 || matches.size() < maxResults) { - // Now, for slightly lower marks, test for strings like "Symphony - // no 8" at the start of the title + // Now, for slightly lower marks, test for strings like + // "Symphony no 8" at the start of the title, or after a + // colon, slash or dash (e.g. "Brahms: Symphony no 4") - QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QRegExp numberRe1("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QRegExp numberRe2("[/:-]\\s*(\\w[^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + QString tag, number; - if (numberRe.indexIn(text) >= 0) { + if (numberRe1.indexIn(text) >= 0) { + tag = numberRe1.cap(1); + number = numberRe1.cap(2); + } else if (numberRe2.indexIn(text) >= 0) { + tag = numberRe2.cap(1); + number = numberRe2.cap(2); + } + + if (tag != "") { - QString tag, number; - tag = numberRe.cap(1); + std::cerr << "tag = \"" << tag.toStdString() << "\", number = \"" + << number.toStdString() << "\"" << std::endl; + tag.replace(QRegExp("[^\\w\\s]+"), ""); - number = numberRe.cap(2); - QString matcherReStr = QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number); QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive); @@ -258,11 +268,26 @@ int dist = ed.calculate(quoted, q, qthresh); if (dist < qthresh) { std::cerr << "quoted name match: " << q.toStdString() << std::endl; - score += 0.7f; + score += 0.7f - 0.1f * dist; } } } + if (score == 0.f) { + int ml = std::min(name.length(), text.length()); + int thresh = ml / 4; + int dist = ed.calculate(text.left(ml), name.left(ml), thresh); + if (dist < thresh) { + std::cerr << "title match: " << name.toStdString() << std::endl; + score += 0.8f - 0.05f * dist; + } + } + + //!!! how to avoid high scores for things that we should be + //!!! able to recognise as different? e.g. "Chamber Symphony + //!!! No. 2" scoring very highly as a match for "Chamber + //!!! Symphony No. 1" + if (score > 0.f) { matches.insert(Guess(score, w)); }
--- a/utilities/track/track.cpp Fri Mar 26 13:53:31 2010 +0000 +++ b/utilities/track/track.cpp Sat Mar 27 12:30:25 2010 +0000 @@ -377,15 +377,28 @@ void guessWorkFromTitle(QString title, float scale, GuessSet &guesses) { if (title == "") return; - GuessSet myGuesses; - WorkCatalogueMatcher matcher(allWorks); - GuessList gl(matcher.match(title, 0)); - if (!gl.empty()) { - foreach (Guess guess, gl) { - myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + { + GuessSet myGuesses; + WorkCatalogueMatcher matcher(allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } } + integrateGuesses(guesses, myGuesses); } - integrateGuesses(guesses, myGuesses); + { + GuessSet myGuesses; + WorkTitleMatcher matcher(allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } + } + integrateGuesses(guesses, myGuesses); + } } void @@ -397,6 +410,14 @@ } void +guessWorkFromFilename(QString filename, float scale, GuessSet &guesses) +{ + cerr << "guessWorkFromFilename: " << filename << endl; + QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\.[^\\.]*"), ""); + guessWorkFromTitle(filepart, scale, guesses); +} + +void guess(QString track) { cout << endl; @@ -469,10 +490,11 @@ GuessSet workGuesses; guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses); guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "TALB", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "TALB", 0.4, workGuesses); guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses); guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "ALBUM", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "ALBUM", 0.4, workGuesses); + guessWorkFromFilename(track, 0.4, workGuesses); cerr << "Work guesses:" << endl; foreach (Guess g, workGuesses) {