comparison import/Import.cpp @ 5:d23a4c935a22 classical-rdf

* Update CMN and mbz mappings for new classical archives import
author Chris Cannam
date Fri, 11 Dec 2009 16:10:29 +0000
parents 719a4f477098
children 96bf272e74c5
comparison
equal deleted inserted replaced
4:719a4f477098 5:d23a4c935a22
139 nr.replace("(II)", "II"); 139 nr.replace("(II)", "II");
140 nr.replace("(III)", "III"); 140 nr.replace("(III)", "III");
141 c->addAlias(nr); 141 c->addAlias(nr);
142 } 142 }
143 143
144 QString makeNameKey(QString name)
145 {
146 QString key = name.toLower()
147 .replace("'", "")
148 .replace("x", "ks")
149 .replace("y", "i")
150 .replace("k", "c")
151 .replace("ch", "c")
152 .replace("cc", "c")
153 .replace("v", "f")
154 .replace("ff", "f")
155 .replace("th", "t")
156 .replace("tch", "ch")
157 .replace("er", "r");
158 // DEBUG << "makeNameKey(" << name << "): " << key << endl;
159 return key;
160 }
161
144 bool namesFuzzyMatch(QString an, Composer *b) 162 bool namesFuzzyMatch(QString an, Composer *b)
145 { 163 {
146 // ew! 164 // ew!
147 165
148 QString bn = b->name(); 166 QString bn = b->name();
158 bn.replace(",", ""); 176 bn.replace(",", "");
159 } else { 177 } else {
160 bSurnameIndex = -1; 178 bSurnameIndex = -1;
161 } 179 }
162 QStringList nl = an.split(QRegExp("[ -]")); 180 QStringList nl = an.split(QRegExp("[ -]"));
163 QStringList bnl = bn.split(QRegExp("[ -]")); 181 QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]"));
164 int matchCount = 0; 182 int matchCount = 0;
165 QString surnameMatch = ""; 183 QString surnameMatch = "";
166 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; 184 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1;
167 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; 185 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1;
168 if (nl[aSurnameIndex][0].isUpper() && 186 if (nl[aSurnameIndex][0].isUpper() &&
169 nl[aSurnameIndex] != "Della" && 187 nl[aSurnameIndex] != "Della" &&
170 nl[aSurnameIndex] == bnl[bSurnameIndex]) { 188 makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) {
171 surnameMatch = nl[aSurnameIndex]; 189 surnameMatch = nl[aSurnameIndex];
172 } 190 }
191 int tested = 0;
173 foreach (QString elt, nl) { 192 foreach (QString elt, nl) {
174 if (!elt[0].isUpper() || elt == "Della") continue; 193 if (!elt[0].isUpper() || elt == "Della") continue;
175 if (bnl.contains(elt)) { 194 QString k = makeNameKey(elt);
195 if (bnl.contains(k)) {
176 ++matchCount; 196 ++matchCount;
177 continue; 197 }
178 } 198 if (++tested == 2 && matchCount == 0) {
179 } 199 return false;
180 if (matchCount > 1 && surnameMatch != "") { 200 }
201 }
202 if (surnameMatch != "") {
181 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; 203 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl;
182 return true; 204 if (matchCount > 1) {
205 return true;
206 } else {
207 DEBUG << "(but not enough else matched)" << endl;
208 return false;
209 }
183 } 210 }
184 return false; 211 return false;
185 } 212 }
186 213
187 bool 214 bool
233 } 260 }
234 261
235 QSet<Composer *> matches; 262 QSet<Composer *> matches;
236 263
237 foreach (QString candidateName, allNames) { 264 foreach (QString candidateName, allNames) {
238 QString key = candidateName.toLower(); 265 QString key = makeNameKey(candidateName);
239 if (composers.contains(key)) { 266 if (composers.contains(key)) {
240 foreach (Composer *candidate, composers[key]) { 267 foreach (Composer *candidate, composers[key]) {
241 if (candidateName == dates) { 268 if (candidateName == dates) {
242 if (!namesFuzzyMatch(c->name(), candidate) && 269 if (c->name() == candidate->name()) {
243 !namesFuzzyMatch(candidate->name(), c)) { 270 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl;
271 } else if (!namesFuzzyMatch(c->name(), candidate) &&
272 !namesFuzzyMatch(candidate->name(), c)) {
244 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; 273 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl;
245 continue; 274 continue;
246 } else { 275 } else {
247 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; 276 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl;
248 } 277 }
259 288
260 if (matches.empty()) { 289 if (matches.empty()) {
261 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; 290 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl;
262 291
263 if (!c->birth() && !c->death()) { 292 if (!c->birth() && !c->death()) {
293 DEBUG << "Composer has no dates, laboriously searching for all names" << endl;
264 // laboriously look for fuzzy match across _all_ composers 294 // laboriously look for fuzzy match across _all_ composers
265 for (ComposerMap::iterator i = composers.begin(); 295 for (ComposerMap::iterator i = composers.begin();
266 i != composers.end(); ++i) { 296 i != composers.end(); ++i) {
267 foreach (Composer *candidate, *i) { 297 foreach (Composer *candidate, *i) {
268 if (namesFuzzyMatch(c->name(), candidate)) { 298 if (namesFuzzyMatch(c->name(), candidate)) {
275 } 305 }
276 } 306 }
277 307
278 if (matches.empty()) { 308 if (matches.empty()) {
279 foreach (QString candidateName, allNames) { 309 foreach (QString candidateName, allNames) {
280 composers[candidateName.toLower()].insert(c); 310 QString key = makeNameKey(candidateName);
311 composers[key].insert(c);
281 DEBUG << "added for alias or date " << candidateName << endl; 312 DEBUG << "added for alias or date " << candidateName << endl;
282 } 313 }
283 return; 314 return;
284 } 315 }
285 } 316 }
296 other->addAlias(other->name()); 327 other->addAlias(other->name());
297 other->setName(c->name()); 328 other->setName(c->name());
298 } else { 329 } else {
299 other->addAlias(c->name()); 330 other->addAlias(c->name());
300 } 331 }
301 composers[c->name().toLower()].insert(other); 332 composers[makeNameKey(c->name())].insert(other);
302 DEBUG << "linking from alias " << c->name() << endl; 333 DEBUG << "linking from alias " << c->name() << endl;
303 334
304 foreach (QString alias, c->aliases()) { 335 foreach (QString alias, c->aliases()) {
305 if (alias != other->name() && 336 if (alias != other->name() &&
306 !other->aliases().contains(alias)) { 337 !other->aliases().contains(alias)) {
307 other->addAlias(alias); 338 other->addAlias(alias);
308 composers[alias.toLower()].insert(other); 339 composers[makeNameKey(alias)].insert(other);
309 DEBUG << "linking from alias " << alias << endl; 340 DEBUG << "linking from alias " << alias << endl;
310 } 341 }
311 } 342 }
312 343
313 foreach (Document *d, c->pages()) { 344 foreach (Document *d, c->pages()) {
751 foreach (Work *w, works) { 782 foreach (Work *w, works) {
752 Composition *cn = w->composition(); 783 Composition *cn = w->composition();
753 if (!cn) continue; 784 if (!cn) continue;
754 if (!cn->composer()) { 785 if (!cn->composer()) {
755 QString cname = cn->composerName(); 786 QString cname = cn->composerName();
787 QString key = makeNameKey(cname);
756 if (cname != "") { 788 if (cname != "") {
757 if (!composers.contains(cname.toLower())) { 789 if (!composers.contains(key)) {
758 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; 790 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
759 } else { 791 } else {
760 QSet<Composer *> cs = composers[cname.toLower()]; 792 QSet<Composer *> cs = composers[key];
761 if (cs.empty()) { 793 if (cs.empty()) {
762 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; 794 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl;
763 } else if (cs.size() > 1) { 795 } else if (cs.size() > 1) {
764 DEBUG << "Failed to assign Composition to composer: " 796 DEBUG << "Failed to assign Composition to composer: "
765 << cs.size() << " composers match name " << cname << endl; 797 << cs.size() << " composers match name " << cname << endl;