Mercurial > hg > classical
changeset 37:a8ab8c08a668
* More drudgery on track id
author | Chris Cannam |
---|---|
date | Tue, 30 Mar 2010 18:12:35 +0100 |
parents | 48d8fec75afb |
children | ac956912fcdf |
files | common/Matcher.cpp common/Objects.cpp common/Objects.h utilities/track/track.cpp |
diffstat | 4 files changed, 165 insertions(+), 69 deletions(-) [+] |
line wrap: on
line diff
--- a/common/Matcher.cpp Tue Mar 30 07:29:08 2010 +0100 +++ b/common/Matcher.cpp Tue Mar 30 18:12:35 2010 +0100 @@ -239,6 +239,9 @@ GuessList results; GuessSet matches; + // Throw away any initial numbers (likely to be track index) + text = text.replace(QRegExp("^[0-9]+"), ""); + QString quoted; QRegExp quoteRe("\\b[\"']([^\"]+)[\"']\\b"); int qthresh = 0; @@ -311,23 +314,6 @@ } if (score > highScore) highScore = score; -/* - if (score == 0.f) { - int ml = std::min(name.length(), text.length()); - int thresh = ml / 4; - int dist = ed.calculate(text.left(ml), name.left(ml), thresh); - if (dist < thresh) { - std::cerr << "title match: " << name.toStdString() << std::endl; - score += 0.8f - 0.05f * dist; - } - } - - if (score > 0.f) { - - // need to avoid high scores for things with differing - // numbers, e.g. "Chamber Symphony No. 2" should not score - // highly as a match for "Chamber Symphony No. 1" - */ } if (highScore > 0.f) {
--- a/common/Objects.cpp Tue Mar 30 07:29:08 2010 +0100 +++ b/common/Objects.cpp Tue Mar 30 18:12:35 2010 +0100 @@ -19,6 +19,13 @@ QMap<QString, Form *> Form::m_map; QMutex Form::m_mutex; +QString +Composition::getComposerName() const +{ + if (m_composer) return m_composer->name(); + return m_cname; +} + bool Composer::matchDates(const Composer *b) const { @@ -655,6 +662,14 @@ emit aliasesChanged(m_aliases); } +QString +Work::getComposerName() const +{ + Composer *c = getComposer(); + if (c) return c->name(); + else return ""; +} + static int compare(QString a, QString b) { @@ -717,12 +732,22 @@ // with N, because we don't want to treat e.g. "Symphony No. 8" // as catalogue number 8. What a fine hack. - QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b"); -// QRegExp catre("\\b([Oo]pu?s?|[A-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)"); + QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)\\.?[\\s_]*(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b"); int ix = 0; while ((ix = catre.indexIn(text, ix+1)) >= 0) { std::cerr << "extractCatalogueNumberTexts: found match \"" << catre.cap(0).toStdString() << "\"" << std::endl; - results.push_back(catre.cap(0)); + QString cat = catre.cap(0); + // ensure space before digit + for (int i = 0; i+1 < cat.length(); ++i) { + if (!cat[i].isDigit() && !cat[i].isSpace() && cat[i+1].isDigit()) { + QString spaced = cat.left(i+1) + " " + cat.right(cat.length()-i-1); + std::cerr << "spaced out from " << cat.toStdString() << " to " + << spaced.toStdString() << std::endl; + cat = spaced; + break; + } + } + results.push_back(cat); } return results; } @@ -772,6 +797,30 @@ return rv; } +QString +Work::getDisplayName() const +{ + QString suffix; + + if (catalogue() != "") { + suffix = catalogue(); + } else if (opus() != "") { + suffix = QString("Op. %1").arg(opus()); + } + if (suffix != "" && number() != "") { + suffix = QString("%1 no. %2").arg(suffix).arg(number()); + } + if (suffix != "") { + if (name() != "") { + return QString("%1, %2").arg(name()).arg(suffix); + } else { + return suffix; + } + } else { + return name(); + } +} + TrackFile::TrackFile(FileSource source, QObject *parent) : QObject(parent) {
--- a/common/Objects.h Tue Mar 30 07:29:08 2010 +0100 +++ b/common/Objects.h Tue Mar 30 18:12:35 2010 +0100 @@ -109,7 +109,7 @@ Q_PROPERTY(ClassicalData::Composer *composer READ composer WRITE setComposer NOTIFY composerChanged STORED true) Q_PROPERTY(QSet<ClassicalData::Work *> works READ works WRITE setWorks NOTIFY worksChanged STORED true) - Q_PROPERTY(QString composerName READ composerName WRITE setComposerName STORED false) + Q_PROPERTY(QString composerName READ getComposerName WRITE setComposerName STORED false) public: Composition() : HistoricalEvent(), m_composer(0) { } @@ -123,8 +123,10 @@ void setWorks(QSet<Work *> c) { m_works = c; emit worksChanged(c); } void addWork(Work *w) { m_works.insert(w); emit worksChanged(m_works); } - // Not a storable property, set temporarily while composer record is found - QString composerName() const { return m_cname; } + // Not a storable property; set temporarily while composer record + // is found, or else looked up from composer after composer has + // been found + QString getComposerName() const; void setComposerName(QString n) { m_cname = n; } signals: @@ -281,7 +283,7 @@ const Composition *composition() const { return m_composition; } void setComposition(Composition *c) { m_composition = c; emit compositionChanged(c); } - Composer *composer() { + Composer *getComposer() const { if (m_composition) { return m_composition->composer(); } else { @@ -289,6 +291,8 @@ } } + QString getComposerName() const; + struct Ordering { bool operator()(Work *, Work *); }; @@ -310,6 +314,9 @@ */ static QStringList extractCatalogueNumberTexts(QString data); + /// Return name composed from title and catalogue, e.g. Symphony no. 4, Op. 42 + QString getDisplayName() const; + signals: void keyChanged(QString); void opusChanged(QString);
--- a/utilities/track/track.cpp Tue Mar 30 07:29:08 2010 +0100 +++ b/utilities/track/track.cpp Tue Mar 30 18:12:35 2010 +0100 @@ -79,7 +79,7 @@ static QList<Composer *> allComposers; static QHash<QString, Composer *> composerAliases; static QHash<Uri, Composer *> composerUris; -static QMap<Composer *, QSet<Work *> > worksMap; +static QMap<Composer *, QList<Work *> > worksMap; static QList<Work *> allWorks; void @@ -358,10 +358,11 @@ guessFromFilename(QString filename, float scale, GuessSet &guesses) { cerr << "guessFromFilename: " << filename << endl; - QString dirpart = QFileInfo(filename).path().replace(QRegExp("\\d+"), ""); + QString dirpart = QFileInfo(filename).path(); QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts); - dirbits = dirbits.last().split(QRegExp("[^\\w]"), - QString::SkipEmptyParts); + dirbits = dirbits.last() + .replace(QRegExp("^\\d+"), "") + .split(QRegExp("[^\\w]"), QString::SkipEmptyParts); if (!dirbits.empty()) { guessFromMaker(dirbits.first(), scale, guesses); } @@ -374,47 +375,80 @@ } } -void guessWorkFromTitle(QString title, float scale, GuessSet &guesses) +void +guessWorkFromTitleByCatalogue(QString title, float scale, + Composer *composer, GuessSet &guesses) { if (title == "") return; - { - GuessSet myGuesses; - WorkCatalogueMatcher matcher(allWorks); - GuessList gl(matcher.match(title, 0)); - if (!gl.empty()) { - foreach (Guess guess, gl) { - myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); - } + WorkCatalogueMatcher matcher(composer ? worksMap.value(composer) : allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + guesses.insert(Guess(guess.confidence() * scale, guess.entity())); } - integrateGuesses(guesses, myGuesses); } - { - GuessSet myGuesses; - WorkTitleMatcher matcher(allWorks); - GuessList gl(matcher.match(title, 0)); - if (!gl.empty()) { - foreach (Guess guess, gl) { - myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); - } +} + +void +guessWorkFromTitle(QString title, float scale, + Composer *composer, GuessSet &guesses) +{ + if (title == "") return; + WorkTitleMatcher matcher(composer ? worksMap.value(composer) : allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + guesses.insert(Guess(guess.confidence() * scale, guess.entity())); } - integrateGuesses(guesses, myGuesses); } } void -guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses) +guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, + Composer *composer, GuessSet &guesses) { if (tags.find(tag) != tags.end()) { - guessWorkFromTitle(tags[tag], scale, guesses); + GuessSet myGuesses; + guessWorkFromTitle(tags[tag], scale, composer, myGuesses); + integrateGuesses(guesses, myGuesses); + myGuesses.clear(); + guessWorkFromTitle(tags[tag], scale, composer, myGuesses); + integrateGuesses(guesses, myGuesses); } } void -guessWorkFromFilename(QString filename, float scale, GuessSet &guesses) +guessWorkFromFilenameByCatalogue(QString filename, float scale, + Composer *composer, GuessSet &guesses) { cerr << "guessWorkFromFilename: " << filename << endl; - QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), ""); - guessWorkFromTitle(filepart, scale, guesses); + + QString dirpart = QFileInfo(filename).path(); + QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts); + if (!dirbits.empty()) { + guessWorkFromTitleByCatalogue(dirbits.last(), scale * 0.7, composer, guesses); + } + + QString filepart = QFileInfo(filename).fileName().replace + (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), ""); + guessWorkFromTitleByCatalogue(filepart, scale, composer, guesses); +} + +void +guessWorkFromFilenameByTitle(QString filename, float scale, + Composer *composer, GuessSet &guesses) +{ + cerr << "guessWorkFromFilename: " << filename << endl; + + QString dirpart = QFileInfo(filename).path(); + QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts); + if (!dirbits.empty()) { + guessWorkFromTitle(dirbits.last(), scale * 0.7, composer, guesses); + } + + QString filepart = QFileInfo(filename).fileName().replace + (QRegExp("\\.[^\\.]*"), "").replace(QRegExp("^\\d+[^\\w]+"), ""); + guessWorkFromTitle(filepart, scale, composer, guesses); } void @@ -487,14 +521,33 @@ bc2 = bg.confidence(); } + // If we have only one confident composer guess, consider only + // works from that composer (really this should permit considering + // works from all confident composers) + Composer *confidentComposer = 0; + if (bc2 > 0.5) { + confidentComposer = qobject_cast<Composer *>(guesses.begin()->entity()); + } + GuessSet workGuesses; - guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses); - guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "TALB", 0.4, workGuesses); - guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses); - guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses); - guessWorkFromTitleTag(tags, "ALBUM", 0.4, workGuesses); - guessWorkFromFilename(track, 0.4, workGuesses); + if (tags["TITLE"] != "") { + guessWorkFromTitleTag(tags, "TITLE", 0.5, confidentComposer, workGuesses); + } + if (tags["TIT2"] != "") { + guessWorkFromTitleTag(tags, "TIT2", 0.5, confidentComposer, workGuesses); + } + if (workGuesses.empty()) { + guessWorkFromTitleTag(tags, "TIT1", 0.2, confidentComposer, workGuesses); + guessWorkFromTitleTag(tags, "TALB", 0.2, confidentComposer, workGuesses); + guessWorkFromTitleTag(tags, "TIT3", 0.1, confidentComposer, workGuesses); + guessWorkFromTitleTag(tags, "ALBUM", 0.4, confidentComposer, workGuesses); + } + if (workGuesses.empty() || workGuesses.begin()->confidence() < 0.3) { + guessWorkFromFilenameByCatalogue(track, 0.4, confidentComposer, workGuesses); + } + if (workGuesses.empty()) { + guessWorkFromFilenameByTitle(track, 0.3, confidentComposer, workGuesses); + } cerr << "Work guesses:" << endl; foreach (Guess g, workGuesses) { @@ -505,8 +558,8 @@ GuessSet consistentWorks; foreach (Guess wg, workGuesses) { Work *w = qobject_cast<Work *>(wg.entity()); - if (!w || !w->composer()) continue; - Composer *wc = w->composer(); + if (!w || !w->getComposer()) continue; + Composer *wc = w->getComposer(); foreach (Guess g, guesses) { if (g.entity() == wc) { consistentComposers.insert(g); @@ -528,13 +581,14 @@ float bc3 = bc2; QString best3 = best2; QString work; - if (!consistentComposers.empty()) { - Guess bg = *consistentComposers.begin(); - best3 = bg.entity()->name(); - bc3 = bg.confidence(); - //!!! not ok, no guarantee that this will be the same work as - //!!! corresponds to the first composer in consistentComposers - work = consistentWorks.begin()->entity()->name(); + if (!consistentWorks.empty()) { + Guess bg = *consistentWorks.begin(); + Work *w = qobject_cast<Work *>(bg.entity()); + if (w) { + bc3 = bg.confidence(); + best3 = w->getComposerName(); + work = w->getDisplayName(); + } } cout << track << "|" << best << "|" << bc << "|" << best2 << "|" << bc2 << "|" << best3 << "|" << bc3 << "|" << work << endl; @@ -621,7 +675,7 @@ Composition *c = w->composition(); if (c) { Composer *cp = c->composer(); - if (cp) worksMap[cp].insert(w); + if (cp) worksMap[cp].push_back(w); } }