changeset 34:271cbaf6e8d9

* First bits of works matching
author Chris Cannam
date Fri, 26 Mar 2010 13:53:31 +0000
parents 84d6acb6b3ba
children 732fb6b754fb
files common/Matcher.cpp common/Matcher.h common/Objects.cpp common/Objects.h utilities/track/track.cpp
diffstat 5 files changed, 555 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/common/Matcher.cpp	Mon Mar 22 16:41:01 2010 +0000
+++ b/common/Matcher.cpp	Fri Mar 26 13:53:31 2010 +0000
@@ -2,9 +2,12 @@
 
 #include "Matcher.h"
 #include "Objects.h"
+#include "EditDistance.h"
 
 #include <QMultiMap>
 
+#include <iostream>
+
 using namespace Dataquay;
 
 namespace ClassicalData {
@@ -19,19 +22,19 @@
                                   float threshold) const
 {
     GuessList results;
+    GuessSet matches;
 
-    QMap<Guess, int> matches;
     foreach (Composer *c, m_composers) {
         float value = c->matchTypingQuick(text);
         if (value < threshold) continue;
-        matches.insert(Guess(value, c), 1);
+        matches.insert(Guess(value, c));
     }
     
     int n = 0;
-    for (QMap<Guess, int>::const_iterator i = matches.begin();
+    for (GuessSet::const_iterator i = matches.begin();
          i != matches.end(); ++i) {
-        results.push_back(i.key());
-        if (++n > maxResults) break;
+        results.push_back(*i);
+        if (maxResults > 0 && ++n > maxResults) break;
     }
 
     return results;
@@ -48,18 +51,18 @@
 {
     GuessList results;
 
-    QMap<Guess, int> matches;
+    GuessSet matches;
     foreach (Composer *c, m_composers) {
         float value = c->matchTyping(text);
         if (value < threshold) continue;
-        matches.insert(Guess(value, c), 1);
+        matches.insert(Guess(value, c));
     }
     
     int n = 0;
-    for (QMap<Guess, int>::const_iterator i = matches.begin();
+    for (GuessSet::const_iterator i = matches.begin();
          i != matches.end(); ++i) {
-        results.push_back(i.key());
-        if (++n > maxResults) break;
+        results.push_back(*i);
+        if (maxResults > 0 && ++n > maxResults) break;
     }
 
     return results;
@@ -76,18 +79,200 @@
 {
     GuessList results;
 
-    QMap<Guess, int> matches;
+    GuessSet matches;
     foreach (Composer *c, m_composers) {
         float value = c->matchFuzzyName(text);
         if (value < threshold) continue;
-        matches.insert(Guess(value, c), 1);
+//        std::cerr << "Liking: " << c->name().toStdString() << " (" << value << ")" << std::endl;
+        matches.insert(Guess(value, c));
     }
     
     int n = 0;
-    for (QMap<Guess, int>::const_iterator i = matches.begin();
+    for (GuessSet::iterator i = matches.begin();
          i != matches.end(); ++i) {
-        results.push_back(i.key());
-        if (++n > maxResults) break;
+        Guess g = *i;
+        results.push_back(g);
+//        std::cerr << "Pushing: " << g.entity()->name().toStdString() << std::endl;
+        if (maxResults > 0 && ++n > maxResults) break;
+    }
+
+    return results;
+}
+
+WorkCatalogueMatcher::WorkCatalogueMatcher(QList<Work *> wl) :
+    m_works(wl)
+{
+}
+
+GuessList
+WorkCatalogueMatcher::match(QString text, int maxResults,
+                            float threshold) const
+{
+    GuessList results;
+    GuessSet matches;
+    QStringList cats = Work::extractCatalogueNumberTexts(text);
+    if (cats.empty()) return results;
+    foreach (QString cat, cats) {
+        std::cerr << "testing cat \"" << cat.toStdString() << "\" against "
+                  << m_works.size() << " works" << std::endl;
+        foreach (Work *w, m_works) {
+            if (maxResults > 0 && matches.size() >= maxResults) {
+                break;
+            }
+            QString catalogue = w->catalogue();
+            if (catalogue != "") {
+                if (!Work::compareCatalogueNumberTexts(catalogue, cat)) {
+                    std::cerr << "We like: " << w->name().toStdString() << " ("
+                              << catalogue.toStdString() << ")" << std::endl;
+                    // all catalogue matches score equal here
+                    matches.insert(Guess(1.f, w));
+                    continue;
+                }
+            }
+            QString opus = w->opus();
+            QString number = w->number();
+            QString optext;
+            if (opus != "") {
+                if (number != "") {
+                    optext = QString("Op %1 no %2").arg(opus).arg(number);
+                    if (!Work::compareCatalogueNumberTexts(optext, cat)) {
+                        std::cerr << "We like: " << w->name().toStdString() << " ("
+                                  << optext.toStdString() << ")" << std::endl;
+                        matches.insert(Guess(1.f, w));
+                        continue;
+                    }
+                } else {
+                    optext = QString("Op %1").arg(opus);
+                    if (!Work::compareCatalogueNumberTexts(optext, cat)) {
+                        std::cerr << "We like: " << w->name().toStdString() << " ("
+                                  << optext.toStdString() << ")" << std::endl;
+                        matches.insert(Guess(1.f, w));
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+
+    if (maxResults == 0 || matches.size() < maxResults) {
+
+        // Now, for slightly lower marks, test for strings like "Symphony
+        // no 8" at the start of the title
+
+        QRegExp numberRe("^([^\\d]+)\\s+[Nn][a-z]*\\.?\\s*(\\d+)");
+
+        if (numberRe.indexIn(text) >= 0) {
+
+            QString tag, number;
+            tag = numberRe.cap(1);
+            tag.replace(QRegExp("[^\\w\\s]+"), "");
+            number = numberRe.cap(2);
+
+            QString matcherReStr =
+                QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(tag).arg(number);
+            QRegExp matcherRe(matcherReStr, Qt::CaseInsensitive);
+            std::cerr << "matcherRe: " << matcherReStr.toStdString() << std::endl;
+
+            // initials only
+/* nah, doesn't work well
+            QString weakTag;
+            QRegExp initialRe("\\b(\\w)\\w*\\b");
+            int ix = 0;
+            while ((ix = initialRe.indexIn(tag, ix)) >= 0) {
+                if (ix > 0) weakTag += "\\s+";
+                weakTag += initialRe.cap(1) + "\\w*";
+                ++ix;
+            }
+            
+            QString weakMatcherReStr =
+                QString("^%1\\s+[Nn][a-z]*\\.?\\s*%2\\b").arg(weakTag).arg(number);
+            QRegExp weakMatcherRe(weakMatcherReStr, Qt::CaseInsensitive);
+            std::cerr << "weakMatcherRe: " << weakMatcherReStr.toStdString() << std::endl;
+*/
+            foreach (Work *w, m_works) {
+                if (maxResults > 0 && matches.size() >= maxResults) {
+                    break;
+                }
+                QString name = w->name();
+                if (matcherRe.indexIn(name) >= 0) {
+                    std::cerr << "We quite like: " << name.toStdString() << std::endl;
+                    matches.insert(Guess(0.8f, w));
+                }
+/* else if (weakMatcherRe.indexIn(name) >= 0) {
+                    std::cerr << "We sorta like: " << name.toStdString() << std::endl;
+                    matches.insert(Guess(0.2f, w));
+                }
+*/
+            }
+        }
+    }
+    
+    int n = 0;
+    for (GuessSet::const_iterator i = matches.begin();
+         i != matches.end(); ++i) {
+        results.push_back(*i);
+        if (maxResults > 0 && ++n > maxResults) break;
+    }
+
+    return results;
+}
+
+WorkTitleMatcher::WorkTitleMatcher(QList<Work *> wl) :
+    m_works(wl)
+{
+}
+
+GuessList
+WorkTitleMatcher::match(QString text, int maxResults,
+                        float threshold) const
+{
+    GuessList results;
+    GuessSet matches;
+
+    QString quoted;
+    QRegExp quoteRe("\\b[\"']([^\"]+)[\"']\\b");
+    int qthresh = 0;
+    
+    if (quoteRe.indexIn(text) >= 0) {
+        quoted = quoteRe.cap(1);
+        if (quoted.length() < 4) quoted = "";
+        qthresh = quoted.length() / 4;
+    }
+
+    std::cerr << "text = " << text.toStdString() << ", quoted = "
+              << quoted.toStdString() << std::endl;
+
+    EditDistance ed;
+
+    foreach (Work *w, m_works) {
+        if (maxResults > 0 && matches.size() >= maxResults) {
+            break;
+        }
+
+        float score = 0.f;
+        QString name = w->name();
+
+        if (quoted != "") {
+            if (quoteRe.indexIn(name) >= 0) {
+                QString q = quoteRe.cap(1);
+                int dist = ed.calculate(quoted, q, qthresh);
+                if (dist < qthresh) {
+                    std::cerr << "quoted name match: " << q.toStdString() << std::endl;
+                    score += 0.7f;
+                }
+            }
+        }
+
+        if (score > 0.f) {
+            matches.insert(Guess(score, w));
+        }
+    }
+    
+    int n = 0;
+    for (GuessSet::const_iterator i = matches.begin();
+         i != matches.end(); ++i) {
+        results.push_back(*i);
+        if (maxResults > 0 && ++n > maxResults) break;
     }
 
     return results;
--- a/common/Matcher.h	Mon Mar 22 16:41:01 2010 +0000
+++ b/common/Matcher.h	Fri Mar 26 13:53:31 2010 +0000
@@ -3,15 +3,15 @@
 #ifndef _CLASSICAL_DATA_MATCHER_H_
 #define _CLASSICAL_DATA_MATCHER_H_
 
+#include "Objects.h"
+
 #include <dataquay/Uri.h>
 
 #include <QHash>
+#include <set>
 
 namespace ClassicalData {
 
-class Composer;
-class NamedEntity;
-
 class Guess
 {
 public:
@@ -20,11 +20,14 @@
     float confidence() const { return m_confidence; }
     void setConfidence(float c) { m_confidence = c; }
     
-    NamedEntity *entity() { return m_entity; }
+    NamedEntity *entity() const { return m_entity; }
     void setEntity(NamedEntity *e) { m_entity = e; }
 
     bool operator<(const Guess &g) const {
-        return (confidence() > g.confidence());
+        if (confidence() == g.confidence()) {
+            return entity()->uri() < g.entity()->uri();
+        }
+        return (confidence() > g.confidence()); // n.b. most confident first
     }
     
 private:
@@ -33,6 +36,7 @@
 };
 
 typedef QList<Guess> GuessList;
+typedef std::set<Guess> GuessSet;
 
 class Matcher
 {
@@ -76,6 +80,28 @@
     QList<Composer *> m_composers;
 };
 
+class WorkCatalogueMatcher : public Matcher
+{
+public:
+    WorkCatalogueMatcher(QList<Work *> wl);
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const;
+
+private:
+    QList<Work *> m_works;
+};
+
+class WorkTitleMatcher : public Matcher
+{
+public:
+    WorkTitleMatcher(QList<Work *> wl);
+    virtual GuessList match(QString text, int maxResults,
+                            float threshold = 0.f) const;
+
+private:
+    QList<Work *> m_works;
+};
+
 }
 
 Q_DECLARE_METATYPE(ClassicalData::Guess*);
--- a/common/Objects.cpp	Mon Mar 22 16:41:01 2010 +0000
+++ b/common/Objects.cpp	Fri Mar 26 13:53:31 2010 +0000
@@ -665,45 +665,60 @@
 Work::compareCatalogueNumberTexts(QString a, QString b)
 {
 //    std::cout << "compare " << a.toStdString()
-//              << " " << b.toStdString() << std::endl;
+//              << " :: " << b.toStdString() << std::endl;
 
     if (a == b) return 0;
 
     if (!a[0].isDigit()) {
-        if (!b[0].isDigit()) {
-            QStringList al = a.split(QRegExp("[ :-]"));
-            QStringList bl = b.split(QRegExp("[ :-]"));
-            if (al.size() < 2 || bl.size() < 2 ||
-                al.size() != bl.size()) {
-                if (a < b) return -1;
-                else if (a > b) return 1;
-                else return 0;
-            }
-            for (int i = 0; i < al.size(); ++i) {
-                if (al[i] != bl[i]) {
-//                    std::cout << "subcompare " << al[i].toStdString()
-//                              << " " << bl[i].toStdString() << std::endl;
-                    return compareCatalogueNumberTexts(al[i], bl[i]);
-                }
-            }
-        } else {
-            return compare(a, b);
-        }
-    } else {
-        if (!b[0].isDigit()) {
-            return compare(a, b);
+        a.replace(QRegExp("^[^\\d]+"), "");
+    }
+
+    if (!b[0].isDigit()) {
+        b.replace(QRegExp("^[^\\d]+"), "");
+    }
+
+    QStringList al = a.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts);
+    QStringList bl = b.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts);
+    if (al.size() != bl.size()) return int(al.size()) - int(bl.size());
+
+/*    if (al.size() < 2 || bl.size() < 2 || al.size() != bl.size()) {
+        if (a < b) return -1;
+        else if (a > b) return 1;
+        else return 0;
+    }
+*/
+    for (int i = 0; i < al.size(); ++i) {
+        if (al[i] != bl[i]) {
+            // use atoi instead of toInt() because we want it to succeed even
+            // if the text is not only an integer (e.g. 35a)
+            int aoi = atoi(al[i].toLocal8Bit().data());
+            int boi = atoi(bl[i].toLocal8Bit().data());
+            if (aoi != boi) return aoi - boi;
+            else return compare(al[i], bl[i]);
         }
     }
-    
-    // use atoi instead of toInt() because we want it to succeed even
-    // if the text is not only an integer (e.g. 35a)
-    int aoi = atoi(a.toLocal8Bit().data());
-    int boi = atoi(b.toLocal8Bit().data());
+    return 0;
+}
 
-//    std::cout << "aoi = " << aoi << ", boi = " << boi << std::endl;
+QStringList
+Work::extractCatalogueNumberTexts(QString text)
+{
+    //!!! test this
+    QStringList results;
+    std::cerr << "Work::extractCatalogueNumberTexts(" << text.toStdString() << ")" << std::endl;
 
-    if (aoi == boi) return compare(a, b);
-    else return aoi - boi;
+    // Note we explicitly exclude "catalogue identifiers" beginning
+    // with N, because we don't want to treat e.g. "Symphony No. 8"
+    // as catalogue number 8.  What a fine hack.
+
+    QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b");
+//    QRegExp catre("\\b([Oo]pu?s?|[A-Z]+)(\\.\\s*|\\s+)(\\d+\\w*)");
+    int ix = 0;
+    while ((ix = catre.indexIn(text, ix+1)) >= 0) {
+        std::cerr << "extractCatalogueNumberTexts: found match \"" << catre.cap(0).toStdString() << "\"" << std::endl;
+        results.push_back(catre.cap(0));
+    }
+    return results;
 }
 
 bool
--- a/common/Objects.h	Mon Mar 22 16:41:01 2010 +0000
+++ b/common/Objects.h	Fri Mar 26 13:53:31 2010 +0000
@@ -249,15 +249,19 @@
     QString key() const { return m_key; }
     void setKey(QString n) { m_key = n; emit keyChanged(n); }
 
+    /// Should omit Op prefix, e.g. in opus 102 no 2, this would be "102"
     QString opus() const { return m_opus; }
     void setOpus(QString n) { m_opus = n; emit opusChanged(n); }
 
+    /// For part of a catalogue entry, e.g. in opus 102 no 2, this
+    /// would be "2".  Should normally be used only with partOf
+    QString number() const { return m_number; }
+    void setNumber(QString n) { m_number = n; emit numberChanged(n); }
+
+    /// Including catalogue prefix, e.g. "BWV 1066"; only for non-opus numbers
     QString catalogue() const { return m_catalogue; }
     void setCatalogue(QString n) { m_catalogue = n; emit catalogueChanged(n); }
 
-    QString number() const { return m_number; }
-    void setNumber(QString n) { m_number = n; emit numberChanged(n); }
-
     QSet<Form *> forms() const { return m_forms; }
     void setForms(QSet<Form *> f) { m_forms = f; emit formsChanged(f); }
     void addForm(Form *f) { m_forms.insert(f); emit formsChanged(m_forms); }
@@ -277,6 +281,14 @@
     const Composition *composition() const { return m_composition; }
     void setComposition(Composition *c) { m_composition = c; emit compositionChanged(c); }
 
+    Composer *composer() {
+        if (m_composition) {
+            return m_composition->composer();
+        } else {
+            return 0;
+        }
+    }
+
     struct Ordering {
         bool operator()(Work *, Work *);
     };
@@ -290,6 +302,14 @@
     //!!! todo: unit tests
     static int compareCatalogueNumberTexts(QString a, QString b);
 
+    /**
+     * Where data (possibly a title string, including opus number or
+     * equivalent) appears to contain some text of a suitable form for
+     * use with compareCatalogueNumberTexts, extract and return each
+     * example.
+     */
+    static QStringList extractCatalogueNumberTexts(QString data);
+
 signals:
     void keyChanged(QString);
     void opusChanged(QString);
--- a/utilities/track/track.cpp	Mon Mar 22 16:41:01 2010 +0000
+++ b/utilities/track/track.cpp	Fri Mar 26 13:53:31 2010 +0000
@@ -80,6 +80,7 @@
 static QHash<QString, Composer *> composerAliases;
 static QHash<Uri, Composer *> composerUris;
 static QMap<Composer *, QSet<Work *> > worksMap;
+static QList<Work *> allWorks;
 
 void
 show(Composer *c)
@@ -204,7 +205,8 @@
 getTrackData(FileSource source, QString &fingerprint, QString &puid,
              QString &title, QString &maker, AudioFileReader::TagMap &tags)
 {
-    AudioFileReader *reader = AudioFileReaderFactory::createReader(source);
+//    AudioFileReader *reader = AudioFileReaderFactory::createReader(source);
+    AudioFileReader *reader = AudioFileReaderFactory::createThreadingReader(source);
     if (!reader || !reader->isOK()) {
         cerr << "Failed to open audio file" << endl;
         return;
@@ -212,8 +214,8 @@
 
     title = reader->getTitle();
     maker = reader->getMaker();
-    cout << "File tag title: " << reader->getTitle() << endl;
-    cout << "File tag maker: " << reader->getMaker() << endl;
+//    cout << "File tag title: " << reader->getTitle() << endl;
+//    cout << "File tag maker: " << reader->getMaker() << endl;
 
     cout << "All tags:" << endl;
     tags = reader->getTags();
@@ -261,42 +263,175 @@
     if (puid == "" && !features[1].empty()) {
         puid = QString::fromStdString(features[1][0].label);
     }
-    std::cerr << "fingerprint = " << fingerprint << std::endl;
-    std::cerr << "puid = " << puid << std::endl;
+    delete[] input;
+    delete plugin;
+    delete reader;
+}
+
+float
+bonusFactor(NamedEntity *e)
+{
+    // tiny nudge to prefer composers we actually have works for
+    Composer *c = qobject_cast<Composer *>(e);
+    float f = 1.f;
+    int sz = 0;
+    if (c && worksMap.contains(c)) {
+        sz = worksMap[c].size();
+        while (sz > 0) {
+            f += 0.01;
+            sz = sz / 10;
+        }
+    }
+    return f;
+}
+
+void
+integrateGuesses(GuessSet &guesses, GuessSet newGuesses)
+{
+    QHash<NamedEntity *, float> ecmap;
+    foreach (Guess g, guesses) {
+        ecmap[g.entity()] += g.confidence() * bonusFactor(g.entity());
+    }
+    foreach (Guess g, newGuesses) {
+        if (ecmap.contains(g.entity())) {
+            ecmap[g.entity()] += g.confidence() / 2;
+        } else {
+            ecmap[g.entity()] = g.confidence();
+        }
+    }
+    guesses.clear();
+    foreach (NamedEntity *e, ecmap.keys()) {
+        guesses.insert(Guess(ecmap[e], e));
+    }
+}
+
+void
+guessFromMaker(QString maker, float scale, GuessSet &guesses)
+{
+    if (maker == "") return;
+//    cerr << "guessFromMaker: " << maker << endl;
+    GuessSet myGuesses;
+    if (composerAliases.contains(maker)) {
+        QList<Composer *> matching = composerAliases.values(maker);
+        foreach (Composer *c, matching) {
+            myGuesses.insert(Guess(10 * scale, c));
+        }
+    } else {
+        ComposerFullTextMatcher matcher(allComposers);
+        GuessList gl(matcher.match(maker, 5, 0.5));
+        if (!gl.empty()) {
+            foreach (Guess guess, gl) {
+                myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+            }
+        }
+    }
+    integrateGuesses(guesses, myGuesses);
+}
+
+void
+guessFromMakerTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+{
+    if (tags.find(tag) != tags.end()) {
+        guessFromMaker(tags[tag], scale, guesses);
+    }
+}
+
+void
+guessFromTitle(QString title, float scale, GuessSet &guesses)
+{
+    QStringList bits = title.split(QRegExp("[:,_-]"),
+                                   QString::SkipEmptyParts);
+    if (bits.size() > 1) {
+        guessFromMaker(bits.first(), scale, guesses);
+    }
+}    
+
+void
+guessFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+{
+    if (tags.find(tag) != tags.end()) {
+        guessFromTitle(tags[tag], scale, guesses);
+    }
+}
+
+void
+guessFromFilename(QString filename, float scale, GuessSet &guesses)
+{
+    cerr << "guessFromFilename: " << filename << endl;
+    QString dirpart = QFileInfo(filename).path().replace(QRegExp("\\d+"), "");
+    QStringList dirbits = dirpart.split("/", QString::SkipEmptyParts);
+    dirbits = dirbits.last().split(QRegExp("[^\\w]"),
+                                   QString::SkipEmptyParts);
+    if (!dirbits.empty()) {
+        guessFromMaker(dirbits.first(), scale, guesses);
+    }
+
+    QString filepart = QFileInfo(filename).fileName().replace(QRegExp("\\d+"), "");
+    QStringList filebits = filepart.split(QRegExp("[^\\w]"),
+                                          QString::SkipEmptyParts);
+    if (!filebits.empty()) {
+        guessFromMaker(filebits.first(), scale, guesses);
+    }
+}
+
+void guessWorkFromTitle(QString title, float scale, GuessSet &guesses)
+{
+    if (title == "") return;
+    GuessSet myGuesses;
+    WorkCatalogueMatcher matcher(allWorks);
+    GuessList gl(matcher.match(title, 0));
+    if (!gl.empty()) {
+        foreach (Guess guess, gl) {
+            myGuesses.insert(Guess(guess.confidence() * scale, guess.entity()));
+        }
+    }
+    integrateGuesses(guesses, myGuesses);
+}    
+
+void
+guessWorkFromTitleTag(AudioFileReader::TagMap tags, QString tag, float scale, GuessSet &guesses)
+{
+    if (tags.find(tag) != tags.end()) {
+        guessWorkFromTitle(tags[tag], scale, guesses);
+    }
 }
 
 void
 guess(QString track)
 {
+    cout << endl;
     cout << "Guessing composer for: " << track << endl;
 
-    cerr << "Creating TrackFile object...";
+//    cerr << "Creating TrackFile object...";
     FileSource fs(track);
     TrackFile *tf = new TrackFile(fs);
-    cerr << "done" << endl;
-    cerr << "hash = " << tf->hash() << endl;
+//    cerr << "done" << endl;
+//    cerr << "hash = " << tf->hash() << endl;
 
     QString fingerprint, puid, maker, title;
     AudioFileReader::TagMap tags;
     //!!! bad api!:
     getTrackData(fs, fingerprint, puid, title, maker, tags);
 
-    if (maker != "") {
-        cout << "Name found: " << maker << endl;
-        if (composerAliases.contains(maker)) {
-            QList<Composer *> matching = composerAliases.values(maker);
-            foreach (Composer *c, matching) {
-                cout << "Matched URI: " << c->uri() << endl;
-            }
-        } else {
-            ComposerFullTextMatcher matcher(allComposers);
-            GuessList gl(matcher.match(maker, 5, 0.5));
-            if (!gl.empty()) {
-                foreach (Guess guess, gl) {
-                    cout << "Possibly matched URI (score = " << guess.confidence() << "): " << guess.entity()->uri() << endl;
-                }
-            }
-        }
+    GuessSet guesses;
+
+    guessFromMakerTag(tags, "TCOM", 1.0, guesses);
+    guessFromMakerTag(tags, "COMPOSER", 1.0, guesses);
+
+    if (guesses.empty() || guesses.begin()->confidence() < 0.4) {
+        guessFromMakerTag(tags, "TOPE", 0.8, guesses);
+        guessFromMakerTag(tags, "TPE1", 0.8, guesses);
+
+        guessFromMakerTag(tags, "ARTIST", 0.9, guesses);
+        guessFromMakerTag(tags, "PERFORMER", 0.8, guesses);
+
+        guessFromTitleTag(tags, "TIT1", 0.4, guesses);
+        guessFromTitleTag(tags, "TIT2", 0.5, guesses);
+        guessFromTitleTag(tags, "TALB", 0.5, guesses);
+        guessFromTitleTag(tags, "TIT3", 0.3, guesses);
+
+        guessFromTitleTag(tags, "TITLE", 0.5, guesses);
+        guessFromTitleTag(tags, "ALBUM", 0.5, guesses);
     }
 
     if (tags.find("MUSICBRAINZ_ARTISTID") != tags.end()) {
@@ -304,11 +439,83 @@
         Uri mbzUri = Uri("http://dbtune.org/musicbrainz/resource/artist/" + id);
         cout << "MBZ id found: " << id << endl;
         if (composerUris.contains(mbzUri)) {
-            cout << "Matched URI: " << composerUris[mbzUri]->uri() << endl;
+            guesses.insert(Guess(2.0, composerUris[mbzUri]));
         }
     }
 
+    cerr << "Composer guesses:" << endl;
+    foreach (Guess g, guesses) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    float bc = 0.f;
+    QString best;
+    if (!guesses.empty()) {
+        Guess bg = *guesses.begin();
+        best = bg.entity()->name();
+        bc = bg.confidence();
+    }
+
+    guessFromFilename(track, 0.5, guesses);
+
+    float bc2 = 0.f;
+    QString best2;
+    if (!guesses.empty()) {
+        Guess bg = *guesses.begin();
+        best2 = bg.entity()->name();
+        bc2 = bg.confidence();
+    }
+
+    GuessSet workGuesses;
+    guessWorkFromTitleTag(tags, "TIT1", 0.4, workGuesses);
+    guessWorkFromTitleTag(tags, "TIT2", 0.5, workGuesses);
+    guessWorkFromTitleTag(tags, "TALB", 0.5, workGuesses);
+    guessWorkFromTitleTag(tags, "TIT3", 0.3, workGuesses);
+    guessWorkFromTitleTag(tags, "TITLE", 0.5, workGuesses);
+    guessWorkFromTitleTag(tags, "ALBUM", 0.5, workGuesses);
     
+    cerr << "Work guesses:" << endl;
+    foreach (Guess g, workGuesses) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    GuessSet consistentComposers;
+    GuessSet consistentWorks;
+    foreach (Guess wg, workGuesses) {
+        Work *w = qobject_cast<Work *>(wg.entity());
+        if (!w || !w->composer()) continue;
+        Composer *wc = w->composer();
+        foreach (Guess g, guesses) {
+            if (g.entity() == wc) {
+                consistentComposers.insert(g);
+                consistentWorks.insert(Guess(g.confidence(), wg.entity()));
+            }
+        }
+    }
+
+    cerr << "Consistent composer guesses:" << endl;
+    foreach (Guess g, consistentComposers) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    cerr << "Consistent work guesses:" << endl;
+    foreach (Guess g, consistentWorks) {
+        cerr << "[" << g.confidence() << "] " << g.entity()->uri() << endl;
+    }
+
+    float bc3 = bc2;
+    QString best3 = best2;
+    QString work;
+    if (!consistentComposers.empty()) {
+        Guess bg = *consistentComposers.begin();
+        best3 = bg.entity()->name();
+        bc3 = bg.confidence();
+        //!!! not ok, no guarantee that this will be the same work as
+        //!!! corresponds to the first composer in consistentComposers
+        work = consistentWorks.begin()->entity()->name();
+    }
+
+    cout << track << "|" << best << "|" << bc << "|" << best2 << "|" << bc2 << "|" << best3 << "|" << bc3 << "|" << work << endl;
 }
 
 int
@@ -322,6 +529,28 @@
         args.push_back(argv[i]);
     }
 
+    //!!! unit test!
+    int c = Work::compareCatalogueNumberTexts("Op. 1 no 4", "Op. 3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("1 no 4", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("4 no 2", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Opus 4 no 2", "3 no 2");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 141", "K. 21");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 14", "K. 21");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 6b");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Op 6a", "Op 7");
+    std::cerr << c << std::endl;
+    c = Work::compareCatalogueNumberTexts("Hob XXIIId:Es1", "Hob XXII:B04");
+    std::cerr << c << std::endl;
+
     BasicStore *store = new BasicStore();
     store->setBaseUri(Uri("http://dbtune.org/classical/resource/"));
     ObjectLoader *loader = new ObjectLoader(store);
@@ -366,6 +595,7 @@
     }
 
     foreach (Work *w, works) {
+        allWorks.push_back(w);
         Composition *c = w->composition();
         if (c) {
             Composer *cp = c->composer();