Mercurial > hg > classical
comparison import/Import.cpp @ 5:d23a4c935a22 classical-rdf
* Update CMN and mbz mappings for new classical archives import
author | Chris Cannam |
---|---|
date | Fri, 11 Dec 2009 16:10:29 +0000 |
parents | 719a4f477098 |
children | 96bf272e74c5 |
comparison
equal
deleted
inserted
replaced
4:719a4f477098 | 5:d23a4c935a22 |
---|---|
139 nr.replace("(II)", "II"); | 139 nr.replace("(II)", "II"); |
140 nr.replace("(III)", "III"); | 140 nr.replace("(III)", "III"); |
141 c->addAlias(nr); | 141 c->addAlias(nr); |
142 } | 142 } |
143 | 143 |
144 QString makeNameKey(QString name) | |
145 { | |
146 QString key = name.toLower() | |
147 .replace("'", "") | |
148 .replace("x", "ks") | |
149 .replace("y", "i") | |
150 .replace("k", "c") | |
151 .replace("ch", "c") | |
152 .replace("cc", "c") | |
153 .replace("v", "f") | |
154 .replace("ff", "f") | |
155 .replace("th", "t") | |
156 .replace("tch", "ch") | |
157 .replace("er", "r"); | |
158 // DEBUG << "makeNameKey(" << name << "): " << key << endl; | |
159 return key; | |
160 } | |
161 | |
144 bool namesFuzzyMatch(QString an, Composer *b) | 162 bool namesFuzzyMatch(QString an, Composer *b) |
145 { | 163 { |
146 // ew! | 164 // ew! |
147 | 165 |
148 QString bn = b->name(); | 166 QString bn = b->name(); |
158 bn.replace(",", ""); | 176 bn.replace(",", ""); |
159 } else { | 177 } else { |
160 bSurnameIndex = -1; | 178 bSurnameIndex = -1; |
161 } | 179 } |
162 QStringList nl = an.split(QRegExp("[ -]")); | 180 QStringList nl = an.split(QRegExp("[ -]")); |
163 QStringList bnl = bn.split(QRegExp("[ -]")); | 181 QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); |
164 int matchCount = 0; | 182 int matchCount = 0; |
165 QString surnameMatch = ""; | 183 QString surnameMatch = ""; |
166 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; | 184 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; |
167 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; | 185 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; |
168 if (nl[aSurnameIndex][0].isUpper() && | 186 if (nl[aSurnameIndex][0].isUpper() && |
169 nl[aSurnameIndex] != "Della" && | 187 nl[aSurnameIndex] != "Della" && |
170 nl[aSurnameIndex] == bnl[bSurnameIndex]) { | 188 makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { |
171 surnameMatch = nl[aSurnameIndex]; | 189 surnameMatch = nl[aSurnameIndex]; |
172 } | 190 } |
191 int tested = 0; | |
173 foreach (QString elt, nl) { | 192 foreach (QString elt, nl) { |
174 if (!elt[0].isUpper() || elt == "Della") continue; | 193 if (!elt[0].isUpper() || elt == "Della") continue; |
175 if (bnl.contains(elt)) { | 194 QString k = makeNameKey(elt); |
195 if (bnl.contains(k)) { | |
176 ++matchCount; | 196 ++matchCount; |
177 continue; | 197 } |
178 } | 198 if (++tested == 2 && matchCount == 0) { |
179 } | 199 return false; |
180 if (matchCount > 1 && surnameMatch != "") { | 200 } |
201 } | |
202 if (surnameMatch != "") { | |
181 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; | 203 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; |
182 return true; | 204 if (matchCount > 1) { |
205 return true; | |
206 } else { | |
207 DEBUG << "(but not enough else matched)" << endl; | |
208 return false; | |
209 } | |
183 } | 210 } |
184 return false; | 211 return false; |
185 } | 212 } |
186 | 213 |
187 bool | 214 bool |
233 } | 260 } |
234 | 261 |
235 QSet<Composer *> matches; | 262 QSet<Composer *> matches; |
236 | 263 |
237 foreach (QString candidateName, allNames) { | 264 foreach (QString candidateName, allNames) { |
238 QString key = candidateName.toLower(); | 265 QString key = makeNameKey(candidateName); |
239 if (composers.contains(key)) { | 266 if (composers.contains(key)) { |
240 foreach (Composer *candidate, composers[key]) { | 267 foreach (Composer *candidate, composers[key]) { |
241 if (candidateName == dates) { | 268 if (candidateName == dates) { |
242 if (!namesFuzzyMatch(c->name(), candidate) && | 269 if (c->name() == candidate->name()) { |
243 !namesFuzzyMatch(candidate->name(), c)) { | 270 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; |
271 } else if (!namesFuzzyMatch(c->name(), candidate) && | |
272 !namesFuzzyMatch(candidate->name(), c)) { | |
244 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; | 273 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; |
245 continue; | 274 continue; |
246 } else { | 275 } else { |
247 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; | 276 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; |
248 } | 277 } |
259 | 288 |
260 if (matches.empty()) { | 289 if (matches.empty()) { |
261 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; | 290 DEBUG << "mergeComposer: No existing composer with alias matching any alias of " << c->name() << ", adding" << endl; |
262 | 291 |
263 if (!c->birth() && !c->death()) { | 292 if (!c->birth() && !c->death()) { |
293 DEBUG << "Composer has no dates, laboriously searching for all names" << endl; | |
264 // laboriously look for fuzzy match across _all_ composers | 294 // laboriously look for fuzzy match across _all_ composers |
265 for (ComposerMap::iterator i = composers.begin(); | 295 for (ComposerMap::iterator i = composers.begin(); |
266 i != composers.end(); ++i) { | 296 i != composers.end(); ++i) { |
267 foreach (Composer *candidate, *i) { | 297 foreach (Composer *candidate, *i) { |
268 if (namesFuzzyMatch(c->name(), candidate)) { | 298 if (namesFuzzyMatch(c->name(), candidate)) { |
275 } | 305 } |
276 } | 306 } |
277 | 307 |
278 if (matches.empty()) { | 308 if (matches.empty()) { |
279 foreach (QString candidateName, allNames) { | 309 foreach (QString candidateName, allNames) { |
280 composers[candidateName.toLower()].insert(c); | 310 QString key = makeNameKey(candidateName); |
311 composers[key].insert(c); | |
281 DEBUG << "added for alias or date " << candidateName << endl; | 312 DEBUG << "added for alias or date " << candidateName << endl; |
282 } | 313 } |
283 return; | 314 return; |
284 } | 315 } |
285 } | 316 } |
296 other->addAlias(other->name()); | 327 other->addAlias(other->name()); |
297 other->setName(c->name()); | 328 other->setName(c->name()); |
298 } else { | 329 } else { |
299 other->addAlias(c->name()); | 330 other->addAlias(c->name()); |
300 } | 331 } |
301 composers[c->name().toLower()].insert(other); | 332 composers[makeNameKey(c->name())].insert(other); |
302 DEBUG << "linking from alias " << c->name() << endl; | 333 DEBUG << "linking from alias " << c->name() << endl; |
303 | 334 |
304 foreach (QString alias, c->aliases()) { | 335 foreach (QString alias, c->aliases()) { |
305 if (alias != other->name() && | 336 if (alias != other->name() && |
306 !other->aliases().contains(alias)) { | 337 !other->aliases().contains(alias)) { |
307 other->addAlias(alias); | 338 other->addAlias(alias); |
308 composers[alias.toLower()].insert(other); | 339 composers[makeNameKey(alias)].insert(other); |
309 DEBUG << "linking from alias " << alias << endl; | 340 DEBUG << "linking from alias " << alias << endl; |
310 } | 341 } |
311 } | 342 } |
312 | 343 |
313 foreach (Document *d, c->pages()) { | 344 foreach (Document *d, c->pages()) { |
751 foreach (Work *w, works) { | 782 foreach (Work *w, works) { |
752 Composition *cn = w->composition(); | 783 Composition *cn = w->composition(); |
753 if (!cn) continue; | 784 if (!cn) continue; |
754 if (!cn->composer()) { | 785 if (!cn->composer()) { |
755 QString cname = cn->composerName(); | 786 QString cname = cn->composerName(); |
787 QString key = makeNameKey(cname); | |
756 if (cname != "") { | 788 if (cname != "") { |
757 if (!composers.contains(cname.toLower())) { | 789 if (!composers.contains(key)) { |
758 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | 790 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; |
759 } else { | 791 } else { |
760 QSet<Composer *> cs = composers[cname.toLower()]; | 792 QSet<Composer *> cs = composers[key]; |
761 if (cs.empty()) { | 793 if (cs.empty()) { |
762 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | 794 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; |
763 } else if (cs.size() > 1) { | 795 } else if (cs.size() > 1) { |
764 DEBUG << "Failed to assign Composition to composer: " | 796 DEBUG << "Failed to assign Composition to composer: " |
765 << cs.size() << " composers match name " << cname << endl; | 797 << cs.size() << " composers match name " << cname << endl; |