annotate common/Objects.cpp @ 53:bcea875d8d2f tip

More build fixes
author Chris Cannam
date Thu, 16 Oct 2014 19:03:51 +0100
parents e0e12bd2978d
children
rev   line source
Chris@0 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@0 2
Chris@0 3 #include "Objects.h"
Chris@0 4
Chris@52 5 //#include <dataquay/Debug.h>
Chris@0 6
Chris@0 7 #include <cstdlib>
Chris@0 8 #include <iostream>
Chris@0 9
Chris@11 10 #include "EditDistance.h"
Chris@11 11
Chris@4 12 #include <QHash> // to ensure correct qHash(const QString &) is found
Chris@33 13 #include <QFile>
Chris@33 14 #include <QFileInfo>
Chris@33 15 #include <QCryptographicHash>
Chris@4 16
Chris@0 17 namespace ClassicalData {
Chris@0 18
Chris@0 19 QMap<QString, Form *> Form::m_map;
Chris@0 20 QMutex Form::m_mutex;
Chris@0 21
Chris@37 22 QString
Chris@37 23 Composition::getComposerName() const
Chris@37 24 {
Chris@37 25 if (m_composer) return m_composer->name();
Chris@37 26 return m_cname;
Chris@37 27 }
Chris@37 28
Chris@1 29 bool
Chris@10 30 Composer::matchDates(const Composer *b) const
Chris@1 31 {
Chris@1 32 const Composer *a = this;
Chris@1 33
Chris@1 34 if (a->birth() && b->birth()) {
Chris@1 35 int ay = a->birth()->year(), by = b->birth()->year();
Chris@1 36 if (ay < 1800 || // birth dates before 1700 tend to be vague!
Chris@1 37 a->birth()->approximate() ||
Chris@1 38 b->birth()->approximate()) {
Chris@1 39 if (abs(ay - by) > 25) return false;
Chris@1 40 } else {
Chris@1 41 if (abs(ay - by) > 1) {
Chris@1 42 return false;
Chris@1 43 }
Chris@1 44 }
Chris@1 45 }
Chris@1 46 if (a->death() && b->death()) {
Chris@1 47 int ay = a->death()->year(), by = b->death()->year();
Chris@1 48 if (a->death()->approximate() || b->death()->approximate()) {
Chris@1 49 if (abs(ay - by) > 10) return false;
Chris@1 50 } else if (ay < 1700) {
Chris@1 51 if (abs(ay - by) > 25) return false;
Chris@1 52 } else if (ay < 1800) {
Chris@1 53 // cut a bit of slack, but not as much as for birth date
Chris@1 54 if (abs(ay - by) > 10) return false;
Chris@1 55 } else {
Chris@1 56 if (abs(ay - by) > 1) return false;
Chris@1 57 }
Chris@1 58 }
Chris@1 59 return true;
Chris@1 60 }
Chris@1 61
Chris@11 62 void
Chris@11 63 Composer::cacheNames() const
Chris@11 64 {
Chris@11 65 if (m_namesCached) return;
Chris@11 66
Chris@11 67 QString n = name();
Chris@11 68 QStringList pl = n.split(", ");
Chris@11 69
Chris@11 70 if (pl.size() == 1) {
Chris@11 71 QStringList pl2;
Chris@11 72 pl = n.split(' ');
Chris@11 73 pl2.push_back(pl[pl.size()-1]);
Chris@11 74 pl2.push_back("");
Chris@11 75 for (int i = 0; i+1 < pl.size(); ++i) {
Chris@11 76 if (i > 0) pl2[1] += " ";
Chris@11 77 pl2[1] += pl[i];
Chris@11 78 }
Chris@11 79 pl = pl2;
Chris@11 80 }
Chris@11 81
Chris@11 82 m_surname = pl[0];
Chris@11 83
Chris@11 84 n = "";
Chris@11 85 for (int i = 1; i < pl.size(); ++i) {
Chris@11 86 if (i > 1) n += ", ";
Chris@11 87 n += pl[i];
Chris@11 88 }
Chris@11 89
Chris@11 90 m_forenames = n;
Chris@11 91
Chris@11 92 m_surnameElements.clear();
Chris@11 93 m_connectiveElements.clear();
Chris@11 94 m_forenameElements.clear();
Chris@11 95 m_otherElements.clear();
Chris@11 96 m_reducedSurnameElements.clear();
Chris@11 97 m_reducedForenameElements.clear();
Chris@11 98
Chris@13 99 static QRegExp sre("[\\., -]+");
Chris@13 100
Chris@13 101 foreach (QString s, m_surname.split(sre, QString::SkipEmptyParts)) {
Chris@11 102 if (s[0].isUpper()) {
Chris@11 103 m_surnameElements.push_back(s.toLower());
Chris@11 104 m_reducedSurnameElements.push_back(reduceName(s));
Chris@12 105 } else if (s.length() > 1) {
Chris@11 106 m_connectiveElements.push_back(s.toLower());
Chris@11 107 }
Chris@11 108 }
Chris@11 109
Chris@13 110 foreach (QString s, m_forenames.split(sre, QString::SkipEmptyParts)) {
Chris@11 111 if (s[0].isUpper()) {
Chris@11 112 m_forenameElements.push_back(s.toLower());
Chris@11 113 m_reducedForenameElements.push_back(reduceName(s));
Chris@12 114 } else if (s.length() > 1) {
Chris@11 115 m_connectiveElements.push_back(s.toLower());
Chris@11 116 }
Chris@11 117 }
Chris@11 118
Chris@11 119 foreach (QString a, m_aliases) {
Chris@13 120 foreach (QString ae, a.split(sre, QString::SkipEmptyParts)) {
Chris@11 121 m_otherElements.push_back(ae.toLower());
Chris@11 122 }
Chris@11 123 }
Chris@13 124
Chris@13 125 m_namesCached = true;
Chris@11 126 }
Chris@11 127
Chris@0 128 QString
Chris@0 129 Composer::getSortName(bool caps) const
Chris@0 130 {
Chris@10 131 QString surname = getSurname();
Chris@10 132 QString forenames = getForenames();
Chris@10 133 if (caps) surname = surname.toUpper();
Chris@10 134 if (forenames != "") return surname + ", " + forenames;
Chris@10 135 else return surname;
Chris@10 136 }
Chris@10 137
Chris@10 138 QString
Chris@10 139 Composer::getSurname() const
Chris@10 140 {
Chris@11 141 cacheNames();
Chris@11 142 return m_surname;
Chris@10 143 }
Chris@10 144
Chris@10 145 QString
Chris@10 146 Composer::getForenames() const
Chris@10 147 {
Chris@11 148 cacheNames();
Chris@11 149 return m_forenames;
Chris@0 150 }
Chris@0 151
Chris@0 152 QString
Chris@0 153 Composer::getDisplayDates() const
Chris@0 154 {
Chris@0 155 QString s;
Chris@0 156 if (birth() || death()) {
Chris@0 157 bool showApprox = false;
Chris@0 158 if ((birth() && birth()->approximate()) ||
Chris@0 159 (death() && death()->approximate())) {
Chris@0 160 showApprox = true;
Chris@0 161 }
Chris@0 162 if (birth()) {
Chris@0 163 if (birth()->place() != "") {
Chris@0 164 s += birth()->place() + ", ";
Chris@0 165 }
Chris@0 166 if (showApprox) {
Chris@0 167 s += "c. ";
Chris@0 168 showApprox = false;
Chris@0 169 }
Chris@22 170 s += QString("%1").arg(birth()->year().toInt());
Chris@0 171 }
Chris@0 172 s += "-";
Chris@0 173 if (death()) {
Chris@0 174 if (death()->place() != "") {
Chris@0 175 s += death()->place() + ", ";
Chris@0 176 }
Chris@0 177 if (showApprox) {
Chris@0 178 s += "c. ";
Chris@0 179 showApprox = false;
Chris@0 180 }
Chris@22 181 s += QString("%1").arg(death()->year().toInt());
Chris@0 182 }
Chris@0 183 }
Chris@0 184
Chris@0 185 return s;
Chris@0 186 }
Chris@10 187
Chris@10 188 static QString
Chris@10 189 asciify(QString field)
Chris@10 190 {
Chris@10 191 QString ascii;
Chris@10 192 for (int i = 0; i < field.length(); ++i) {
Chris@10 193 QString dc = field[i].decomposition();
Chris@10 194 if (dc != "") ascii += dc[0];
Chris@10 195 else if (field[i] == QChar(0x00DF)) {
Chris@10 196 ascii += "ss";
Chris@10 197 } else {
Chris@10 198 ascii += field[i];
Chris@10 199 }
Chris@10 200 }
Chris@10 201 ascii.replace(QString::fromUtf8("\342\200\231"), "'"); // apostrophe
Chris@10 202 ascii.replace(QString::fromUtf8("\342\200\222"), "-");
Chris@10 203 ascii.replace(QString::fromUtf8("\342\200\223"), "-");
Chris@10 204 ascii.replace(QString::fromUtf8("\342\200\224"), "-");
Chris@10 205 ascii.replace(QString::fromUtf8("\342\200\225"), "-");
Chris@10 206 return ascii;
Chris@10 207 }
Chris@10 208
Chris@10 209 QString
Chris@10 210 Composer::reduceName(QString name)
Chris@10 211 {
Chris@10 212 QString key = asciify(name).toLower()
Chris@10 213 .replace("'", "")
Chris@10 214 .replace("x", "ks")
Chris@10 215 .replace("y", "i")
Chris@36 216 .replace("ie", "i")
Chris@36 217 .replace("ei", "i")
Chris@36 218 .replace("ii", "i")
Chris@10 219 .replace("k", "c")
Chris@10 220 .replace("aa", "a")
Chris@36 221 .replace("a", "e")
Chris@36 222 .replace("ee", "e")
Chris@10 223 .replace("v", "f")
Chris@36 224 .replace("ph", "f")
Chris@10 225 .replace("ff", "f")
Chris@10 226 .replace("th", "t")
Chris@10 227 .replace("tch", "ch")
Chris@36 228 .replace("ch", "c")
Chris@36 229 .replace("cc", "c")
Chris@10 230 .replace("er", "r");
Chris@10 231 return key;
Chris@10 232 }
Chris@10 233
Chris@10 234 bool
Chris@10 235 Composer::matchCatalogueName(QString an) const
Chris@10 236 {
Chris@10 237 // ew!
Chris@10 238
Chris@10 239 QString bn = name();
Chris@10 240 if (bn == an) return true;
Chris@10 241 if (aliases().contains(an)) return true;
Chris@10 242
Chris@10 243 int aSurnameIndex = 0, bSurnameIndex = 0;
Chris@10 244 if (an.contains(",")) {
Chris@10 245 an.replace(",", "");
Chris@10 246 } else {
Chris@10 247 aSurnameIndex = -1;
Chris@10 248 }
Chris@10 249 if (bn.contains(",")) {
Chris@10 250 bn.replace(",", "");
Chris@10 251 } else {
Chris@10 252 bSurnameIndex = -1;
Chris@10 253 }
Chris@10 254 QStringList nl = an.split(QRegExp("[ -]"));
Chris@10 255 QStringList bnl = reduceName(bn).split(QRegExp("[ -]"));
Chris@10 256 int matchCount = 0;
Chris@10 257 QString surnameMatch = "";
Chris@10 258 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
Chris@10 259 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
Chris@10 260 if (nl[aSurnameIndex][0].isUpper() &&
Chris@10 261 nl[aSurnameIndex] != "Della" &&
Chris@10 262 reduceName(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
Chris@10 263 surnameMatch = nl[aSurnameIndex];
Chris@10 264 }
Chris@10 265 int tested = 0;
Chris@10 266 foreach (QString elt, nl) {
Chris@10 267 if (!elt[0].isUpper() || elt == "Della") continue;
Chris@10 268 QString k = reduceName(elt);
Chris@10 269 if (bnl.contains(k)) {
Chris@10 270 ++matchCount;
Chris@10 271 }
Chris@10 272 if (++tested == 2 && matchCount == 0) {
Chris@10 273 return false;
Chris@10 274 }
Chris@10 275 }
Chris@10 276 if (surnameMatch != "") {
Chris@52 277 // DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
Chris@10 278 if (matchCount > 1) {
Chris@10 279 return true;
Chris@10 280 } else {
Chris@52 281 // DEBUG << "(but not enough else matched)" << endl;
Chris@10 282 return false;
Chris@10 283 }
Chris@10 284 }
Chris@10 285 return false;
Chris@10 286 }
Chris@10 287
Chris@14 288 float
Chris@10 289 Composer::matchFuzzyName(QString n) const
Chris@10 290 {
Chris@13 291 int fameBonus = m_pages.size();
Chris@13 292 if (n == name()) return 100 + fameBonus;
Chris@13 293 static QRegExp sre("[\\., -]+");
Chris@13 294 return matchFuzzyName(n.toLower().split(sre, QString::SkipEmptyParts));
Chris@13 295 }
Chris@13 296
Chris@15 297 static int
Chris@15 298 calculateThresholdedDistance(EditDistance &ed, const QString &user,
Chris@15 299 const QString &machine)
Chris@15 300 {
Chris@15 301 int threshold = machine.length()/3;
Chris@15 302 int dist;
Chris@15 303 if (threshold == 0) dist = (user == machine ? 0 : -1);
Chris@15 304 else {
Chris@15 305 dist = ed.calculate(user, machine, threshold);
Chris@15 306 if (dist > threshold) dist = -1;
Chris@15 307 }
Chris@15 308 return dist;
Chris@15 309 }
Chris@15 310
Chris@14 311 float
Chris@13 312 Composer::matchFuzzyName(QStringList elements) const
Chris@13 313 {
Chris@14 314 if (elements.empty()) return 0;
Chris@14 315
Chris@11 316 cacheNames();
Chris@11 317 int fameBonus = m_pages.size();
Chris@10 318
Chris@28 319 EditDistance ed(EditDistance::RestrictedTransposition);
Chris@10 320
Chris@10 321 int score = 0;
Chris@15 322 bool haveSurname = false;
Chris@15 323
Chris@15 324 // We aim to scale the eventual result such that a score of 1.0 or
Chris@15 325 // more indicates near-certainty that this is a correct match
Chris@15 326 // (i.e. that it is properly matched -- not that it is the only
Chris@15 327 // possible match). To achieve this score, we need to have
Chris@15 328 // matched with reasonable confidence every element in the passed
Chris@15 329 // elements list, and to have matched at least one of them to a
Chris@15 330 // part of our surname.
Chris@15 331
Chris@15 332 int matched = 0;
Chris@15 333 int unmatched = 0;
Chris@10 334
Chris@11 335 foreach (QString elt, elements) {
Chris@10 336
Chris@11 337 bool accept = false;
Chris@11 338
Chris@11 339 if (elt.length() == 1) {
Chris@15 340 // An initial: search forenames only, ignoring
Chris@15 341 // connectives. The score contribution here is low, but
Chris@15 342 // they do not count to matched which means the score can
Chris@15 343 // only enhance whatever happens elsewhere. They can
Chris@15 344 // however seriously damage our score if unmatched, which
Chris@15 345 // is as it should be.
Chris@11 346 foreach (QString s, m_forenameElements) {
Chris@11 347 if (s[0] == elt[0]) {
Chris@15 348 score += 2;
Chris@11 349 accept = true;
Chris@10 350 break;
Chris@10 351 }
Chris@10 352 }
Chris@11 353 if (!accept) {
Chris@15 354 foreach (QString s, m_connectiveElements) {
Chris@15 355 if (s[0] == elt[0]) {
Chris@15 356 score += 1;
Chris@15 357 accept = true;
Chris@15 358 break;
Chris@15 359 }
Chris@15 360 }
Chris@10 361 }
Chris@15 362 if (!accept) {
Chris@15 363 foreach (QString s, m_surnameElements) {
Chris@15 364 if (s[0] == elt[0]) {
Chris@15 365 // no score, but don't call it unmatched
Chris@15 366 accept = true;
Chris@15 367 break;
Chris@15 368 }
Chris@15 369 }
Chris@15 370 }
Chris@15 371 if (!accept) ++unmatched;
Chris@10 372 continue;
Chris@10 373 }
Chris@11 374
Chris@11 375 foreach (QString s, m_surnameElements) {
Chris@15 376 int dist = calculateThresholdedDistance(ed, elt, s);
Chris@15 377 if (dist >= 0) {
Chris@15 378 score += 22 - dist*2;
Chris@15 379 if (elt[0] != s[0]) score -= 10;
Chris@15 380 accept = true;
Chris@13 381 // std::cerr << "[surname: " << s.toStdString() << "]" << std::endl;
Chris@10 382 break;
Chris@10 383 }
Chris@10 384 }
Chris@15 385 if (accept) {
Chris@15 386 haveSurname = true;
Chris@15 387 ++matched;
Chris@15 388 continue;
Chris@15 389 }
Chris@10 390
Chris@11 391 foreach (QString s, m_forenameElements) {
Chris@15 392 int dist = calculateThresholdedDistance(ed, elt, s);
Chris@15 393 if (dist >= 0) {
Chris@15 394 score += 22 - dist*2;
Chris@15 395 if (elt[0] != s[0]) score -= 10;
Chris@15 396 accept = true;
Chris@13 397 // std::cerr << "[forename: " << s.toStdString() << "]" << std::endl;
Chris@10 398 break;
Chris@10 399 }
Chris@10 400 }
Chris@15 401 if (accept) {
Chris@15 402 ++matched;
Chris@15 403 continue;
Chris@15 404 }
Chris@10 405
Chris@11 406 foreach (QString s, m_connectiveElements) {
Chris@15 407 // treated much like initials
Chris@15 408 int dist = calculateThresholdedDistance(ed, elt, s);
Chris@15 409 if (dist == 0) {
Chris@15 410 score += 2;
Chris@15 411 accept = true;
Chris@15 412 } else if (dist == 1) {
Chris@15 413 score += 1;
Chris@15 414 accept = true;
Chris@15 415 }
Chris@11 416 if (accept) {
Chris@13 417 // std::cerr << "[connective: " << s.toStdString() << "]" << std::endl;
Chris@10 418 break;
Chris@10 419 }
Chris@10 420 }
Chris@15 421 if (accept) {
Chris@11 422 continue;
Chris@11 423 }
Chris@11 424
Chris@15 425 QString reduced = reduceName(elt);
Chris@15 426
Chris@16 427 //!!! these don't seem to match often...
Chris@16 428
Chris@15 429 if (m_reducedSurnameElements.contains(reduced)) {
Chris@15 430 score += 10;
Chris@15 431 haveSurname = true;
Chris@15 432 ++matched;
Chris@15 433 std::cerr << "[reduced surname: " << elt.toStdString() << "]" << std::endl;
Chris@15 434 continue;
Chris@15 435 }
Chris@15 436
Chris@15 437 if (m_reducedForenameElements.contains(reduced)) {
Chris@11 438 score += 7;
Chris@15 439 ++matched;
Chris@15 440 std::cerr << "[reduced forename: " << elt.toStdString() << "]" << std::endl;
Chris@11 441 continue;
Chris@11 442 }
Chris@11 443
Chris@11 444 foreach (QString s, m_otherElements) {
Chris@15 445 int dist = calculateThresholdedDistance(ed, elt, s);
Chris@15 446 if (dist >= 0) {
Chris@15 447 score += 22 - dist*2;
Chris@15 448 if (elt[0] != s[0]) score -= 10;
Chris@15 449 accept = true;
Chris@13 450 // std::cerr << "[other: " << s.toStdString() << "]" << std::endl;
Chris@10 451 break;
Chris@10 452 }
Chris@10 453 }
Chris@15 454 if (accept) {
Chris@15 455 ++matched;
Chris@15 456 continue;
Chris@15 457 }
Chris@10 458
Chris@15 459 ++unmatched;
Chris@11 460 }
Chris@15 461
Chris@15 462 // if (fameBonus > 0) std::cerr << "[fame: " << fameBonus << "]" << std::endl;
Chris@15 463 score += fameBonus;
Chris@10 464
Chris@15 465 if (matched == 0) {
Chris@15 466 if (unmatched == 0) {
Chris@15 467 return float(score) / 20.f;
Chris@15 468 } else {
Chris@15 469 return 0;
Chris@15 470 }
Chris@11 471 }
Chris@15 472
Chris@15 473 float fscore = score;
Chris@15 474 float divisor = (matched + unmatched) * 20;
Chris@15 475
Chris@15 476 if (!haveSurname) fscore /= 2;
Chris@15 477 if (unmatched > 0) fscore /= 1.5;
Chris@15 478
Chris@15 479 fscore /= divisor;
Chris@15 480
Chris@15 481 if (matched > 0) {
Chris@15 482 // std::cerr << "[score " << score << " with divisor " << divisor << " for " << name().toStdString() << " adjusted to " << fscore << "]" << std::endl;
Chris@15 483 }
Chris@15 484
Chris@15 485 return fscore;
Chris@10 486 }
Chris@0 487
Chris@16 488 float
Chris@19 489 Composer::matchTyping(QString t) const
Chris@16 490 {
Chris@28 491 return doMatchTyping(t, false);
Chris@28 492 }
Chris@28 493
Chris@28 494 float
Chris@28 495 Composer::matchTypingQuick(QString t) const
Chris@28 496 {
Chris@28 497 return doMatchTyping(t, true);
Chris@28 498 }
Chris@28 499
Chris@28 500 float
Chris@28 501 Composer::doMatchTyping(QString t, bool quick) const
Chris@28 502 {
Chris@19 503 if (t == "") return 0;
Chris@16 504
Chris@16 505 cacheNames();
Chris@28 506 float fameBonus = m_pages.size() / 400.f;
Chris@16 507
Chris@28 508 QString n = name().toLower();
Chris@28 509 t = t.toLower();
Chris@16 510
Chris@19 511 if (n == t) return 1.f + fameBonus;
Chris@19 512 if (n.startsWith(t)) return 0.8f + fameBonus;
Chris@28 513
Chris@28 514 QSet<QString> sl;
Chris@28 515 QSet<QString> nl;
Chris@28 516 foreach (QString s, m_surnameElements) {
Chris@28 517 sl.insert(s.toLower());
Chris@28 518 nl.insert(s.toLower());
Chris@28 519 }
Chris@28 520 foreach (QString s, m_forenameElements) {
Chris@28 521 nl.insert(s.toLower());
Chris@28 522 }
Chris@28 523 if (!quick) {
Chris@28 524 foreach (QString s, m_otherElements) {
Chris@28 525 nl.insert(s.toLower());
Chris@28 526 }
Chris@28 527 foreach (QString s, m_connectiveElements) {
Chris@28 528 nl.insert(s.toLower());
Chris@28 529 }
Chris@28 530 }
Chris@28 531
Chris@28 532 static QRegExp sre("[\\., -]+");
Chris@28 533 QStringList tl = t.split(sre, QString::SkipEmptyParts);
Chris@19 534
Chris@16 535 float score = 0.f;
Chris@16 536
Chris@19 537 if (nl.empty() || tl.empty()) return 0.f;
Chris@19 538
Chris@19 539 int unmatched = 0;
Chris@28 540
Chris@19 541 for (int i = 0; i < tl.size(); ++i) {
Chris@28 542
Chris@28 543 QString tel = tl[i];
Chris@28 544 float component = 0.f;
Chris@28 545 float max = 0.f;
Chris@28 546
Chris@28 547 for (QSet<QString>::const_iterator ni = nl.begin();
Chris@28 548 ni != nl.end(); ++ni) {
Chris@28 549
Chris@28 550 QString nel = ni->toLower();
Chris@28 551
Chris@28 552 if (tel == nel) {
Chris@28 553 if (tel.length() > 1) {
Chris@28 554 component = 0.2;
Chris@19 555 } else {
Chris@28 556 component = 0.1;
Chris@19 557 }
Chris@28 558 if (sl.contains(nel)) component *= 1.5;
Chris@28 559 goto calculated;
Chris@19 560 }
Chris@28 561
Chris@28 562 if (nel.startsWith(tel)) {
Chris@28 563 component = 0.1;
Chris@28 564 if (sl.contains(nel)) component *= 1.5;
Chris@28 565 goto calculated;
Chris@28 566 }
Chris@28 567
Chris@28 568 if (!quick) {
Chris@29 569 if (tel.length() > 3) {
Chris@28 570 EditDistance ed(EditDistance::RestrictedTransposition);
Chris@28 571 int dist = calculateThresholdedDistance
Chris@29 572 (ed, nel.left(tel.length()), tel);
Chris@28 573 if (dist >= 0) {
Chris@28 574 component = 0.08 - dist * 0.01;
Chris@28 575 if (sl.contains(nel)) component *= 1.5;
Chris@28 576 }
Chris@28 577 }
Chris@28 578 if (component > 0.f) goto calculated;
Chris@28 579 }
Chris@28 580
Chris@28 581 if (nel.startsWith(tel[0])) {
Chris@28 582 component += 0.02;
Chris@28 583 }
Chris@28 584
Chris@28 585 calculated:
Chris@28 586 if (component > max) max = component;
Chris@16 587 }
Chris@28 588
Chris@28 589 score += max;
Chris@16 590 }
Chris@16 591
Chris@28 592 if (!quick) {
Chris@28 593 if (t.contains(" ")) {
Chris@28 594 float fuzzyScore = matchFuzzyName(t);
Chris@28 595 if (fuzzyScore >= 0.4f) {
Chris@28 596 score += fuzzyScore / 3.f;
Chris@28 597 }
Chris@19 598 }
Chris@19 599 }
Chris@19 600
Chris@16 601 if (score > 0.f) score += fameBonus;
Chris@16 602 return score;
Chris@16 603 }
Chris@16 604
Chris@24 605 void
Chris@24 606 Composer::mergeFrom(Composer *c)
Chris@24 607 {
Chris@24 608 QSet<QString> allNames = c->aliases();
Chris@25 609 allNames.insert(c->name());
Chris@24 610
Chris@24 611 foreach (QString n, allNames) {
Chris@24 612 if (n != m_name && !m_aliases.contains(n)) {
Chris@24 613 m_aliases.insert(n);
Chris@24 614 m_namesCached = false;
Chris@24 615 }
Chris@24 616 }
Chris@24 617
Chris@24 618 if (!m_birth) {
Chris@31 619 if (c->birth()) {
Chris@31 620 m_birth = new Birth(*c->birth());
Chris@31 621 emit birthChanged(m_birth);
Chris@31 622 }
Chris@24 623 }
Chris@24 624
Chris@24 625 if (!m_death) {
Chris@31 626 if (c->death()) {
Chris@31 627 m_death = new Death(*c->death());
Chris@31 628 emit deathChanged(m_death);
Chris@31 629 }
Chris@24 630 }
Chris@24 631
Chris@24 632 if (c->gender() != "") {
Chris@24 633 if (m_gender == "") {
Chris@24 634 m_gender = c->gender();
Chris@31 635 emit genderChanged(m_gender);
Chris@24 636 } else if (c->gender() != m_gender) {
Chris@24 637 std::cerr << "WARNING: Composer::mergeFrom: Gender mismatch! Composer " << c->name().toStdString() << " has gender " << c->gender().toStdString() << ", but target composer " << m_name.toStdString() << " has gender " << m_gender.toStdString() << std::endl;
Chris@24 638 }
Chris@24 639 }
Chris@24 640
Chris@24 641 m_nationality.unite(c->nationality());
Chris@24 642 m_geonameURIs.unite(c->geonameURIs());
Chris@24 643 m_otherURIs.unite(c->otherURIs());
Chris@26 644
Chris@26 645 foreach (Document *d, c->pages()) {
Chris@38 646 /*
Chris@26 647 Document *dd = new Document;
Chris@26 648 dd->setUri(d->uri());
Chris@38 649 dd->setSiteName(d->siteName());
Chris@26 650 dd->setTopic(this);
Chris@26 651 m_pages.insert(dd);
Chris@38 652 */
Chris@38 653 d->setTopic(this);
Chris@38 654 m_pages.insert(d);
Chris@26 655 }
Chris@24 656
Chris@24 657 if (m_period == "") m_period = c->period();
Chris@24 658 if (m_remarks == "") m_remarks = c->remarks();
Chris@31 659
Chris@31 660 emit nationalityChanged(m_nationality);
Chris@31 661 emit geonameURIsChanged(m_geonameURIs);
Chris@31 662 emit otherURIsChanged(m_otherURIs);
Chris@31 663 emit pagesChanged(m_pages);
Chris@31 664 emit periodChanged(m_period);
Chris@31 665 emit remarksChanged(m_remarks);
Chris@31 666 emit aliasesChanged(m_aliases);
Chris@24 667 }
Chris@24 668
Chris@37 669 QString
Chris@37 670 Work::getComposerName() const
Chris@37 671 {
Chris@37 672 Composer *c = getComposer();
Chris@37 673 if (c) return c->name();
Chris@37 674 else return "";
Chris@37 675 }
Chris@37 676
Chris@0 677 static int
Chris@0 678 compare(QString a, QString b)
Chris@0 679 {
Chris@0 680 if (a < b) {
Chris@0 681 return -1;
Chris@0 682 } else if (a > b) {
Chris@0 683 return 1;
Chris@0 684 } else {
Chris@0 685 return 0;
Chris@0 686 }
Chris@0 687 }
Chris@0 688
Chris@10 689 int
Chris@10 690 Work::compareCatalogueNumberTexts(QString a, QString b)
Chris@0 691 {
Chris@0 692 // std::cout << "compare " << a.toStdString()
Chris@34 693 // << " :: " << b.toStdString() << std::endl;
Chris@0 694
Chris@0 695 if (a == b) return 0;
Chris@0 696
Chris@0 697 if (!a[0].isDigit()) {
Chris@34 698 a.replace(QRegExp("^[^\\d]+"), "");
Chris@34 699 }
Chris@34 700
Chris@34 701 if (!b[0].isDigit()) {
Chris@34 702 b.replace(QRegExp("^[^\\d]+"), "");
Chris@34 703 }
Chris@34 704
Chris@34 705 QStringList al = a.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts);
Chris@34 706 QStringList bl = b.split(QRegExp("\\b[^\\d]*"), QString::SkipEmptyParts);
Chris@34 707 if (al.size() != bl.size()) return int(al.size()) - int(bl.size());
Chris@34 708
Chris@34 709 /* if (al.size() < 2 || bl.size() < 2 || al.size() != bl.size()) {
Chris@34 710 if (a < b) return -1;
Chris@34 711 else if (a > b) return 1;
Chris@34 712 else return 0;
Chris@34 713 }
Chris@34 714 */
Chris@34 715 for (int i = 0; i < al.size(); ++i) {
Chris@34 716 if (al[i] != bl[i]) {
Chris@34 717 // use atoi instead of toInt() because we want it to succeed even
Chris@34 718 // if the text is not only an integer (e.g. 35a)
Chris@34 719 int aoi = atoi(al[i].toLocal8Bit().data());
Chris@34 720 int boi = atoi(bl[i].toLocal8Bit().data());
Chris@34 721 if (aoi != boi) return aoi - boi;
Chris@34 722 else return compare(al[i], bl[i]);
Chris@0 723 }
Chris@0 724 }
Chris@34 725 return 0;
Chris@34 726 }
Chris@0 727
Chris@34 728 QStringList
Chris@34 729 Work::extractCatalogueNumberTexts(QString text)
Chris@34 730 {
Chris@34 731 //!!! test this
Chris@34 732 QStringList results;
Chris@34 733 std::cerr << "Work::extractCatalogueNumberTexts(" << text.toStdString() << ")" << std::endl;
Chris@0 734
Chris@34 735 // Note we explicitly exclude "catalogue identifiers" beginning
Chris@34 736 // with N, because we don't want to treat e.g. "Symphony No. 8"
Chris@34 737 // as catalogue number 8. What a fine hack.
Chris@34 738
Chris@37 739 QRegExp catre("\\b([Oo]pu?s?|[A-MP-Z]+)\\.?[\\s_]*(\\d+\\w*)(\\s+[Nn]([OoRrBb]?|umber)(\\.\\s*|\\s+)(\\d+\\w*))?\\b");
Chris@34 740 int ix = 0;
Chris@34 741 while ((ix = catre.indexIn(text, ix+1)) >= 0) {
Chris@34 742 std::cerr << "extractCatalogueNumberTexts: found match \"" << catre.cap(0).toStdString() << "\"" << std::endl;
Chris@37 743 QString cat = catre.cap(0);
Chris@37 744 // ensure space before digit
Chris@37 745 for (int i = 0; i+1 < cat.length(); ++i) {
Chris@37 746 if (!cat[i].isDigit() && !cat[i].isSpace() && cat[i+1].isDigit()) {
Chris@37 747 QString spaced = cat.left(i+1) + " " + cat.right(cat.length()-i-1);
Chris@37 748 std::cerr << "spaced out from " << cat.toStdString() << " to "
Chris@37 749 << spaced.toStdString() << std::endl;
Chris@37 750 cat = spaced;
Chris@37 751 break;
Chris@37 752 }
Chris@37 753 }
Chris@37 754 results.push_back(cat);
Chris@34 755 }
Chris@34 756 return results;
Chris@0 757 }
Chris@0 758
Chris@0 759 bool
Chris@0 760 Work::Ordering::operator()(Work *a, Work *b)
Chris@0 761 {
Chris@0 762 if (!a) {
Chris@0 763 if (!b) return false;
Chris@0 764 else return true;
Chris@0 765 } else {
Chris@0 766 if (!b) {
Chris@0 767 return false;
Chris@0 768 }
Chris@0 769 }
Chris@0 770 /*
Chris@0 771 QString ao = a->catalogue();
Chris@0 772 if (ao == "") ao = a->opus();
Chris@0 773
Chris@0 774 QString bo = b->catalogue();
Chris@0 775 if (bo == "") bo = b->opus();
Chris@0 776
Chris@0 777 std::cout << "ao " << ao.toStdString() << ", bo " << bo.toStdString() << std::endl;
Chris@0 778 */
Chris@0 779 int c = 0;
Chris@0 780 if (a->catalogue() != "" && b->catalogue() != "") {
Chris@10 781 c = compareCatalogueNumberTexts(a->catalogue(), b->catalogue());
Chris@0 782 }
Chris@0 783 if (c == 0 && a->opus() != "" && b->opus() != "") {
Chris@10 784 c = compareCatalogueNumberTexts(a->opus(), b->opus());
Chris@0 785 }
Chris@0 786 if (c == 0 && a->partOf() == b->partOf() &&
Chris@0 787 a->number() != "" && b->number() != "") {
Chris@10 788 c = compareCatalogueNumberTexts(a->number(), b->number());
Chris@0 789 }
Chris@0 790
Chris@0 791 bool rv = false;
Chris@0 792
Chris@0 793 if (c == 0) {
Chris@0 794 if (a->name() == b->name()) rv = (a < b);
Chris@0 795 else rv = (a->name() < b->name());
Chris@0 796 } else {
Chris@0 797 rv = (c < 0);
Chris@0 798 }
Chris@0 799
Chris@0 800 // std::cout << "result = " << rv << std::endl;
Chris@0 801 return rv;
Chris@0 802 }
Chris@0 803
Chris@37 804 QString
Chris@37 805 Work::getDisplayName() const
Chris@37 806 {
Chris@37 807 QString suffix;
Chris@37 808
Chris@37 809 if (catalogue() != "") {
Chris@37 810 suffix = catalogue();
Chris@37 811 } else if (opus() != "") {
Chris@37 812 suffix = QString("Op. %1").arg(opus());
Chris@37 813 }
Chris@37 814 if (suffix != "" && number() != "") {
Chris@37 815 suffix = QString("%1 no. %2").arg(suffix).arg(number());
Chris@37 816 }
Chris@37 817 if (suffix != "") {
Chris@37 818 if (name() != "") {
Chris@37 819 return QString("%1, %2").arg(name()).arg(suffix);
Chris@37 820 } else {
Chris@37 821 return suffix;
Chris@37 822 }
Chris@37 823 } else {
Chris@37 824 return name();
Chris@37 825 }
Chris@37 826 }
Chris@37 827
Chris@45 828 AudioFile::AudioFile(QObject *parent) :
Chris@45 829 QObject(parent)
Chris@43 830 {
Chris@43 831 }
Chris@43 832
Chris@45 833 AudioFile::AudioFile(FileSource source, QObject *parent) :
Chris@45 834 QObject(parent)
Chris@33 835 {
Chris@33 836 if (source.isAvailable()) {
Chris@33 837 QFile f(source.getLocalFilename());
Chris@33 838 f.open(QIODevice::ReadOnly);
Chris@45 839 //!!! stream this!
Chris@33 840 QByteArray ba = f.readAll();
Chris@52 841 m_hash = QString::fromLatin1
Chris@33 842 (QCryptographicHash::hash(ba, QCryptographicHash::Sha1).toHex());
Chris@33 843 }
Chris@33 844 QString location = source.getLocation();
Chris@33 845 if (source.isRemote()) {
Chris@33 846 m_uri = Dataquay::Uri(location);
Chris@33 847 } else {
Chris@33 848 if (location.contains("://")) {
Chris@33 849 m_uri = Dataquay::Uri(location);
Chris@33 850 } else if (location.startsWith('/')) {
Chris@33 851 m_uri = Dataquay::Uri("file://" + location);
Chris@33 852 } else {
Chris@33 853 m_uri = Dataquay::Uri("file://" + QFileInfo(location).canonicalFilePath());
Chris@33 854 }
Chris@33 855 }
Chris@45 856
Chris@45 857 std::cerr << "AudioFile::AudioFile: hash = " << m_hash.toStdString()
Chris@33 858 << ", uri = " << m_uri.toString().toStdString() << std::endl;
Chris@33 859 }
Chris@33 860
Chris@48 861 AudioFile::~AudioFile()
Chris@48 862 {
Chris@48 863 foreach (AudioFileTag *t, m_tags) delete t;
Chris@48 864 }
Chris@48 865
Chris@48 866 void
Chris@48 867 AudioFile::setTags(QSet<AudioFileTag *> tt)
Chris@48 868 {
Chris@48 869 foreach (AudioFileTag *t, m_tags) {
Chris@48 870 if (!tt.contains(t)) delete t;
Chris@48 871 }
Chris@48 872 m_tags = tt;
Chris@48 873 }
Chris@48 874
Chris@48 875 void
Chris@48 876 AudioFile::addTag(AudioFileTag *t)
Chris@48 877 {
Chris@48 878 m_tags.insert(t);
Chris@48 879 }
Chris@0 880
Chris@0 881 }
Chris@0 882