Mercurial > hg > classical
changeset 34:271cbaf6e8d9
* First bits of works matching
author | Chris Cannam |
---|---|
date | Fri, 26 Mar 2010 13:53:31 +0000 |
parents | 84d6acb6b3ba |
children | 732fb6b754fb |
files | common/Matcher.cpp common/Matcher.h common/Objects.cpp common/Objects.h utilities/track/track.cpp |
diffstat | 5 files changed, 555 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/common/Matcher.cpp Mon Mar 22 16:41:01 2010 +0000 +++ b/common/Matcher.cpp Fri Mar 26 13:53:31 2010 +0000 @@ -2,9 +2,12 @@ #include "Matcher.h" #include "Objects.h" +#include "EditDistance.h" #include <QMultiMap> +#include <iostream> + using namespace Dataquay; namespace ClassicalData { @@ -19,19 +22,19 @@ float threshold) const { GuessList results; + GuessSet matches; - QMap<Guess, int> matches; foreach (Composer *c, m_composers) { float value = c->matchTypingQuick(text); if (value < threshold) continue; - matches.insert(Guess(value, c), 1); + matches.insert(Guess(value, c)); } int n = 0; - for (QMap<Guess, int>::const_iterator i = matches.begin(); + for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { - results.push_back(i.key()); - if (++n > maxResults) break; + results.push_back(*i); + if (maxResults > 0 && ++n > maxResults) break; } return results; @@ -48,18 +51,18 @@ { GuessList results; - QMap<Guess, int> matches; + GuessSet matches; foreach (Composer *c, m_composers) { float value = c->matchTyping(text); if (value < threshold) continue; - matches.insert(Guess(value, c), 1); + matches.insert(Guess(value, c)); } int n = 0; - for (QMap<Guess, int>::const_iterator i = matches.begin(); + for (GuessSet::const_iterator i = matches.begin(); i != matches.end(); ++i) { - results.push_back(i.key()); - if (++n > maxResults) break; + results.push_back(*i); + if (maxResults > 0 && ++n > maxResults) break; } return results; @@ -76,18 +79,200 @@ { GuessList results; - QMap<Guess, int> matches; + GuessSet matches; foreach (Composer *c, m_composers) { float value = c->matchFuzzyName(text); if (value < threshold) continue; - matches.insert(Guess(value, c), 1); +// std::cerr << "Liking: " << c->name().toStdString() << " (" << value << ")" << std::endl; + matches.insert(Guess(value, c)); } int n = 0; - for (QMap<Guess, int>::const_iterator i = matches.begin(); + for (GuessSet::iterator i = matches.begin(); i != matches.end(); ++i) { - results.push_back(i.key()); - if (++n > maxResults) break; + Guess g = *i; + results.push_back(g); +// std::cerr << "Pushing: " << g.entity()->name().toStdString() << std::endl; + if (maxResults > 0 && ++n > maxResults) break; + } + + return results; +} + +WorkCatalogueMatcher::WorkCatalogueMatcher(QList<Work *> wl) : + m_works(wl) +{ +} + +GuessList +WorkCatalogueMatcher::match(QString text, int maxResults, + float threshold) const +{ + GuessList results; + GuessSet matches; + QStringList cats = Work::extractCatalogueNumberTexts(text); + if (cats.empty()) return results; + foreach (QString cat, cats) { + std::cerr << "testing cat \"" << cat.toStdString() << "\" against " + << m_works.size() << " works" << std::endl; + foreach (Work *w, m_works) { + if (maxResults > 0 && matches.size() >= maxResults) { + break; + } + QString catalogue = w->catalogue(); + if (catalogue != "") { + if (!Work::compareCatalogueNumberTexts(catalogue, cat)) { + std::cerr << "We like: " << w->name().toStdString() << " (" + << catalogue.toStdString() << ")" << std::endl; + // all catalogue matches score equal here + matches.insert(Guess(1.f, w)); + continue; + } + } + QString opus = w->opus(); + QString number = w->number(); + QString optext; + if (opus != "") { + if (number != "") { + optext = QString("Op %1 no %2").arg(opus).arg(number); + if (!Work::compareCatalogueNumberTexts(optext, cat)) { + std::cerr << "We like: " << w->name().toStdString() << " (" + << optext.toStdString() << ")" << std::endl; + matches.insert(Guess(1.f, w)); + continue; + } + } else { + optext = QString("Op %1").arg(opus); + if (!Work::compareCatalogueNumberTexts(optext, cat)) { + std::cerr << "We like: " << w->name().toStdString() << " (" + << optext.toStdString() << ")" << std::endl; + matches.insert(Guess(1.f, w)); + continue; + } + } + } + } + } + + if (maxResults == 0 || matches.size() < maxResults) { + + // Now, for slightly lower marks, test for strings like "Symphony + // no 8" at the start of the title + + QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)"); + + if (numberRe.indexIn(text) >= 0) { + + QString tag, number; + tag = numberRe.cap(1); + tag.replace(QRegExp("[^\\w\\s]+"), ""); + number = numberRe.cap(2); + + QString matcherReStr = + QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number); + QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive); + std::cerr << "matcherRe: " << matcherReStr.toStdString() << std::endl; + + // initials only +/* nah, doesn't work well + QString weakTag; + QRegExp initialRe("\\b(\\w)\\w*\\b"); + int ix = 0; + while ((ix = initialRe.indexIn(tag, ix)) >= 0) { + if (ix > 0) weakTag += "\\s+"; + weakTag += initialRe.cap(1) + "\\w*"; + ++ix; + } + + QString weakMatcherReStr = + QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(weakTag).arg(number); + QRegExp weakMatcherRe(weakMatcherReStr, Qt::CaseInsensitive); + std::cerr << "weakMatcherRe: " << weakMatcherReStr.toStdString() << std::endl; +*/ + foreach (Work *w, m_works) { + if (maxResults > 0 && matches.size() >= maxResults) { + break; + } + QString name = w->name(); + if (matcherRe.indexIn(name) >= 0) { + std::cerr << "We quite like: " << name.toStdString() << std::endl; + matches.insert(Guess(0.8f, w)); + } +/* else if (weakMatcherRe.indexIn(name) >= 0) { + std::cerr << "We sorta like: " << name.toStdString() << std::endl; + matches.insert(Guess(0.2f, w)); + } +*/ + } + } + } + + int n = 0; + for (GuessSet::const_iterator i = matches.begin(); + i != matches.end(); ++i) { + results.push_back(*i); + if (maxResults > 0 && ++n > maxResults) break; + } + + return results; +} + +WorkTitleMatcher::WorkTitleMatcher(QList<Work *> wl) : + m_works(wl) +{ +} + +GuessList +WorkTitleMatcher::match(QString text, int maxResults, + float threshold) const +{ + GuessList results; + GuessSet matches; + + QString quoted; + QRegExp quoteRe("\\b[\"']([^\"]+)[\"']\\b"); + int qthresh = 0; + + if (quoteRe.indexIn(text) >= 0) { + quoted = quoteRe.cap(1); + if (quoted.length() < 4) quoted = ""; + qthresh = quoted.length() / 4; + } + + std::cerr << "text = " << text.toStdString() << ", quoted = " + << quoted.toStdString() << std::endl; + + EditDistance ed; + + foreach (Work *w, m_works) { + if (maxResults > 0 && matches.size() >= maxResults) { + break; + } + + float score = 0.f; + QString name = w->name(); + + if (quoted != "") { + if (quoteRe.indexIn(name) >= 0) { + QString q = quoteRe.cap(1); + int dist = ed.calculate(quoted, q, qthresh); + if (dist < qthresh) { + std::cerr << "quoted name match: " << q.toStdString() << std::endl; + score += 0.7f; + } + } + } + + if (score > 0.f) { + matches.insert(Guess(score, w)); + } + } + + int n = 0; + for (GuessSet::const_iterator i = matches.begin(); + i != matches.end(); ++i) { + results.push_back(*i); + if (maxResults > 0 && ++n > maxResults) break; } return results;
--- a/common/Matcher.h Mon Mar 22 16:41:01 2010 +0000 +++ b/common/Matcher.h Fri Mar 26 13:53:31 2010 +0000 @@ -3,15 +3,15 @@ #ifndef _CLASSICAL_DATA_MATCHER_H_ #define _CLASSICAL_DATA_MATCHER_H_ +#include "Objects.h" + #include <dataquay/Uri.h> #include <QHash> +#include <set> namespace ClassicalData { -class Composer; -class NamedEntity; - class Guess { public: @@ -20,11 +20,14 @@ float confidence() const { return m_confidence; } void setConfidence(float c) { m_confidence = c; } - NamedEntity *entity() { return m_entity; } + NamedEntity *entity() const { return m_entity; } void setEntity(NamedEntity *e) { m_entity = e; } bool operator<(const Guess &g) const { - return (confidence() > g.confidence()); + if (confidence() == g.confidence()) { + return entity()->uri() < g.entity()->uri(); + } + return (confidence() > g.confidence()); // n.b. most confident first } private: @@ -33,6 +36,7 @@ }; typedef QList<Guess> GuessList; +typedef std::set<Guess> GuessSet; class Matcher { @@ -76,6 +80,28 @@ QList<Composer *> m_composers; }; +class WorkCatalogueMatcher : public Matcher +{ +public: + WorkCatalogueMatcher(QList<Work *> wl); + virtual GuessList match(QString text, int maxResults, + float threshold = 0.f) const; + +private: + QList<Work *> m_works; +}; + +class WorkTitleMatcher : public Matcher +{ +public: + WorkTitleMatcher(QList<Work *> wl); + virtual GuessList match(QString text, int maxResults, + float threshold = 0.f) const; + +private: + QList<Work *> m_works; +}; + } Q_DECLARE_METATYPE(ClassicalData::Guess*);
--- a/common/Objects.cpp Mon Mar 22 16:41:01 2010 +0000 +++ b/common/Objects.cpp Fri Mar 26 13:53:31 2010 +0000 @@ -665,45 +665,60 @@ Work::compareCatalogueNumberTexts(QString a, QString b) { // std::cout << "compare " << a.toStdString() -// << " " << b.toStdString() << std::endl; +// << " :: " << b.toStdString() << std::endl; if (a == b) return 0; if (!a[0].isDigit()) { - if (!b[0].isDigit()) { - QStringList al = a.split(QRegExp("[ :-]")); - QStringList bl = b.split(QRegExp("[ :-]")); - if (al.size() < 2 || bl.size() < 2 || - al.size() != bl.size()) { - if (a < b) return -1; - else if (a > b) return 1; - else return 0; - } - for (int i = 0; i < al.size(); ++i) { - if (al[i] != bl[i]) { -// std::cout << "subcompare " << al[i].toStdString() -// << " " << bl[i].toStdString() << std::endl; - return compareCatalogueNumberTexts(al[i], bl[i]); - } - } - } else { - return compare(a, b); - } - } else { - if (!b[0].isDigit()) { - return compare(a, b); + a.replace(QRegExp("^[^\\d]+"), ""); + } + + if (!b[0].isDigit()) { + b.replace(QRegExp("^[^\\d]+"), ""); + } + + QStringList al = a.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts); + QStringList bl = b.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts); + if (al.size() != bl.size()) return int(al.size()) - int(bl.size()); + +/* if (al.size() < 2 || bl.size() < 2 || al.size() != bl.size()) { + if (a < b) return -1; + else if (a > b) return 1; + else return 0; + } +*/ + for (int i = 0; i < al.size(); ++i) { + if (al[i] != bl[i]) { + // use atoi instead of toInt() because we want it to succeed even + // if the text is not only an integer (e.g. 35a) + int aoi = atoi(al[i].toLocal8Bit().data()); + int boi = atoi(bl[i].toLocal8Bit().data()); + if (aoi != boi) return aoi - boi; + else return compare(al[i], bl[i]); } } - - // use atoi instead of toInt() because we want it to succeed even - // if the text is not only an integer (e.g. 35a) - int aoi = atoi(a.toLocal8Bit().data()); - int boi = atoi(b.toLocal8Bit().data()); + return 0; +} -// std::cout << "aoi = " << aoi << ", boi = " << boi << std::endl; +QStringList +Work::extractCatalogueNumberTexts(QString text) +{ + //!!! test this + QStringList results; + std::cerr << "Work::extractCatalogueNumberTexts(" << text.toStdString() << ")" << std::endl; - if (aoi == boi) return compare(a, b); - else return aoi - boi; + // Note we explicitly exclude "catalogue identifiers" beginning + // with N, because we don't want to treat e.g. "Symphony No. 8" + // as catalogue number 8. What a fine hack. + + QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b"); +// QRegExp catre("\\b([Oo]pu?s?|[A-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)"); + int ix = 0; + while ((ix = catre.indexIn(text, ix+1)) >= 0) { + std::cerr << "extractCatalogueNumberTexts: found match \"" << catre.cap(0).toStdString() << "\"" << std::endl; + results.push_back(catre.cap(0)); + } + return results; } bool
--- a/common/Objects.h Mon Mar 22 16:41:01 2010 +0000 +++ b/common/Objects.h Fri Mar 26 13:53:31 2010 +0000 @@ -249,15 +249,19 @@ QString key() const { return m_key; } void setKey(QString n) { m_key = n; emit keyChanged(n); } + /// Should omit Op prefix, e.g. in opus 102 no 2, this would be "102" QString opus() const { return m_opus; } void setOpus(QString n) { m_opus = n; emit opusChanged(n); } + /// For part of a catalogue entry, e.g. in opus 102 no 2, this + /// would be "2". Should normally be used only with partOf + QString number() const { return m_number; } + void setNumber(QString n) { m_number = n; emit numberChanged(n); } + + /// Including catalogue prefix, e.g. "BWV 1066"; only for non-opus numbers QString catalogue() const { return m_catalogue; } void setCatalogue(QString n) { m_catalogue = n; emit catalogueChanged(n); } - QString number() const { return m_number; } - void setNumber(QString n) { m_number = n; emit numberChanged(n); } - QSet<Form *> forms() const { return m_forms; } void setForms(QSet<Form *> f) { m_forms = f; emit formsChanged(f); } void addForm(Form *f) { m_forms.insert(f); emit formsChanged(m_forms); } @@ -277,6 +281,14 @@ const Composition *composition() const { return m_composition; } void setComposition(Composition *c) { m_composition = c; emit compositionChanged(c); } + Composer *composer() { + if (m_composition) { + return m_composition->composer(); + } else { + return 0; + } + } + struct Ordering { bool operator()(Work *, Work *); }; @@ -290,6 +302,14 @@ //!!! todo: unit tests static int compareCatalogueNumberTexts(QString a, QString b); + /** + * Where data (possibly a title string, including opus number or + * equivalent) appears to contain some text of a suitable form for + * use with compareCatalogueNumberTexts, extract and return each + * example. + */ + static QStringList extractCatalogueNumberTexts(QString data); + signals: void keyChanged(QString); void opusChanged(QString);
--- a/utilities/track/track.cpp Mon Mar 22 16:41:01 2010 +0000 +++ b/utilities/track/track.cpp Fri Mar 26 13:53:31 2010 +0000 @@ -80,6 +80,7 @@ static QHash<QString, Composer *> composerAliases; static QHash<Uri, Composer *> composerUris; static QMap<Composer *, QSet<Work *> > worksMap; +static QList<Work *> allWorks; void show(Composer *c) @@ -204,7 +205,8 @@ getTrackData(FileSource source, QString &fingerprint, QString &puid, QString &title, QString &maker, AudioFileReader::TagMap &tags) { - AudioFileReader *reader = AudioFileReaderFactory::createReader(source); +// AudioFileReader *reader = AudioFileReaderFactory::createReader(source); + AudioFileReader *reader = AudioFileReaderFactory::createThreadingReader(source); if (!reader || !reader->isOK()) { cerr << "Failed to open audio file" << endl; return; @@ -212,8 +214,8 @@ title = reader->getTitle(); maker = reader->getMaker(); - cout << "File tag title: " << reader->getTitle() << endl; - cout << "File tag maker: " << reader->getMaker() << endl; +// cout << "File tag title: " << reader->getTitle() << endl; +// cout << "File tag maker: " << reader->getMaker() << endl; cout << "All tags:" << endl; tags = reader->getTags(); @@ -261,42 +263,175 @@ if (puid == "" && !features[1].empty()) { puid = QString::fromStdString(features[1][0].label); } - std::cerr << "fingerprint = " << fingerprint << std::endl; - std::cerr << "puid = " << puid << std::endl; + delete[] input; + delete plugin; + delete reader; +} + +float +bonusFactor(NamedEntity *e) +{ + // tiny nudge to prefer composers we actually have works for + Composer *c = qobject_cast<Composer *>(e); + float f = 1.f; + int sz = 0; + if (c && worksMap.contains(c)) { + sz = worksMap[c].size(); + while (sz > 0) { + f += 0.01; + sz = sz / 10; + } + } + return f; +} + +void +integrateGuesses(GuessSet &guesses, GuessSet newGuesses) +{ + QHash<NamedEntity *, float> ecmap; + foreach (Guess g, guesses) { + ecmap[g.entity()] += g.confidence() * bonusFactor(g.entity()); + } + foreach (Guess g, newGuesses) { + if (ecmap.contains(g.entity())) { + ecmap[g.entity()] += g.confidence() / 2; + } else { + ecmap[g.entity()] = g.confidence(); + } + } + guesses.clear(); + foreach (NamedEntity *e, ecmap.keys()) { + guesses.insert(Guess(ecmap[e], e)); + } +} + +void +guessFromMaker(QString maker, float scale, GuessSet &guesses) +{ + if (maker == "") return; +// cerr << "guessFromMaker: " << maker << endl; + GuessSet myGuesses; + if (composerAliases.contains(maker)) { + QList<Composer *> matching = composerAliases.values(maker); + foreach (Composer *c, matching) { + myGuesses.insert(Guess(10 * scale, c)); + } + } else { + ComposerFullTextMatcher matcher(allComposers); + GuessList gl(matcher.match(maker, 5, 0.5)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } + } + } + integrateGuesses(guesses, myGuesses); +} + +void +guessFromMakerTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses) +{ + if (tags.find(tag) != tags.end()) { + guessFromMaker(tags[tag], scale, guesses); + } +} + +void +guessFromTitle(QString title, float scale, GuessSet &guesses) +{ + QStringList bits = title.split(QRegExp("[:,_-]"), + QString::SkipEmptyParts); + if (bits.size() > 1) { + guessFromMaker(bits.first(), scale, guesses); + } +} + +void +guessFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses) +{ + if (tags.find(tag) != tags.end()) { + guessFromTitle(tags[tag], scale, guesses); + } +} + +void +guessFromFilename(QString filename, float scale, GuessSet &guesses) +{ + cerr << "guessFromFilename: " << filename << endl; + QString dirpart = QFileInfo(filename).path().replace(QRegExp("\\d+"), ""); + QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts); + dirbits = dirbits.last().split(QRegExp("[^\\w]"), + QString::SkipEmptyParts); + if (!dirbits.empty()) { + guessFromMaker(dirbits.first(), scale, guesses); + } + + QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\d+"), ""); + QStringList filebits = filepart.split(QRegExp("[^\\w]"), + QString::SkipEmptyParts); + if (!filebits.empty()) { + guessFromMaker(filebits.first(), scale, guesses); + } +} + +void guessWorkFromTitle(QString title, float scale, GuessSet &guesses) +{ + if (title == "") return; + GuessSet myGuesses; + WorkCatalogueMatcher matcher(allWorks); + GuessList gl(matcher.match(title, 0)); + if (!gl.empty()) { + foreach (Guess guess, gl) { + myGuesses.insert(Guess(guess.confidence() * scale, guess.entity())); + } + } + integrateGuesses(guesses, myGuesses); +} + +void +guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses) +{ + if (tags.find(tag) != tags.end()) { + guessWorkFromTitle(tags[tag], scale, guesses); + } } void guess(QString track) { + cout << endl; cout << "Guessing composer for: " << track << endl; - cerr << "Creating TrackFile object..."; +// cerr << "Creating TrackFile object..."; FileSource fs(track); TrackFile *tf = new TrackFile(fs); - cerr << "done" << endl; - cerr << "hash = " << tf->hash() << endl; +// cerr << "done" << endl; +// cerr << "hash = " << tf->hash() << endl; QString fingerprint, puid, maker, title; AudioFileReader::TagMap tags; //!!! bad api!: getTrackData(fs, fingerprint, puid, title, maker, tags); - if (maker != "") { - cout << "Name found: " << maker << endl; - if (composerAliases.contains(maker)) { - QList<Composer *> matching = composerAliases.values(maker); - foreach (Composer *c, matching) { - cout << "Matched URI: " << c->uri() << endl; - } - } else { - ComposerFullTextMatcher matcher(allComposers); - GuessList gl(matcher.match(maker, 5, 0.5)); - if (!gl.empty()) { - foreach (Guess guess, gl) { - cout << "Possibly matched URI (score = " << guess.confidence() << "): " << guess.entity()->uri() << endl; - } - } - } + GuessSet guesses; + + guessFromMakerTag(tags, "TCOM", 1.0, guesses); + guessFromMakerTag(tags, "COMPOSER", 1.0, guesses); + + if (guesses.empty() || guesses.begin()->confidence() < 0.4) { + guessFromMakerTag(tags, "TOPE", 0.8, guesses); + guessFromMakerTag(tags, "TPE1", 0.8, guesses); + + guessFromMakerTag(tags, "ARTIST", 0.9, guesses); + guessFromMakerTag(tags, "PERFORMER", 0.8, guesses); + + guessFromTitleTag(tags, "TIT1", 0.4, guesses); + guessFromTitleTag(tags, "TIT2", 0.5, guesses); + guessFromTitleTag(tags, "TALB", 0.5, guesses); + guessFromTitleTag(tags, "TIT3", 0.3, guesses); + + guessFromTitleTag(tags, "TITLE", 0.5, guesses); + guessFromTitleTag(tags, "ALBUM", 0.5, guesses); } if (tags.find("MUSICBRAINZ_ARTISTID") != tags.end()) { @@ -304,11 +439,83 @@ Uri mbzUri = Uri("http://dbtune.org/musicbrainz/resource/artist/" + id); cout << "MBZ id found: " << id << endl; if (composerUris.contains(mbzUri)) { - cout << "Matched URI: " << composerUris[mbzUri]->uri() << endl; + guesses.insert(Guess(2.0, composerUris[mbzUri])); } } + cerr << "Composer guesses:" << endl; + foreach (Guess g, guesses) { + cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl; + } + + float bc = 0.f; + QString best; + if (!guesses.empty()) { + Guess bg = *guesses.begin(); + best = bg.entity()->name(); + bc = bg.confidence(); + } + + guessFromFilename(track, 0.5, guesses); + + float bc2 = 0.f; + QString best2; + if (!guesses.empty()) { + Guess bg = *guesses.begin(); + best2 = bg.entity()->name(); + bc2 = bg.confidence(); + } + + GuessSet workGuesses; + guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses); + guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "TALB", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses); + guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses); + guessWorkFromTitleTag(tags, "ALBUM", 0.5, workGuesses); + cerr << "Work guesses:" << endl; + foreach (Guess g, workGuesses) { + cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl; + } + + GuessSet consistentComposers; + GuessSet consistentWorks; + foreach (Guess wg, workGuesses) { + Work *w = qobject_cast<Work *>(wg.entity()); + if (!w || !w->composer()) continue; + Composer *wc = w->composer(); + foreach (Guess g, guesses) { + if (g.entity() == wc) { + consistentComposers.insert(g); + consistentWorks.insert(Guess(g.confidence(), wg.entity())); + } + } + } + + cerr << "Consistent composer guesses:" << endl; + foreach (Guess g, consistentComposers) { + cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl; + } + + cerr << "Consistent work guesses:" << endl; + foreach (Guess g, consistentWorks) { + cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl; + } + + float bc3 = bc2; + QString best3 = best2; + QString work; + if (!consistentComposers.empty()) { + Guess bg = *consistentComposers.begin(); + best3 = bg.entity()->name(); + bc3 = bg.confidence(); + //!!! not ok, no guarantee that this will be the same work as + //!!! corresponds to the first composer in consistentComposers + work = consistentWorks.begin()->entity()->name(); + } + + cout << track << "|" << best << "|" << bc << "|" << best2 << "|" << bc2 << "|" << best3 << "|" << bc3 << "|" << work << endl; } int @@ -322,6 +529,28 @@ args.push_back(argv[i]); } + //!!! unit test! + int c = Work::compareCatalogueNumberTexts("Op. 1 no 4", "Op. 3 no 2"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("1 no 4", "3 no 2"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("4 no 2", "3 no 2"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Opus 4 no 2", "3 no 2"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Op 141", "K. 21"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Op 14", "K. 21"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6b"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Op 6a", "Op 7"); + std::cerr << c << std::endl; + c = Work::compareCatalogueNumberTexts("Hob XXIIId:Es1", "Hob XXII:B04"); + std::cerr << c << std::endl; + BasicStore *store = new BasicStore(); store->setBaseUri(Uri("http://dbtune.org/classical/resource/")); ObjectLoader *loader = new ObjectLoader(store); @@ -366,6 +595,7 @@ } foreach (Work *w, works) { + allWorks.push_back(w); Composition *c = w->composition(); if (c) { Composer *cp = c->composer();