Mercurial > hg > classical
comparison import/Import.cpp @ 10:d35e5d769c87 classical-rdf
* some experiments with composer name matching
author | Chris Cannam |
---|---|
date | Wed, 17 Feb 2010 19:26:48 +0000 |
parents | df999875c53b |
children | c8ef23d3888c |
comparison
equal
deleted
inserted
replaced
9:9e2b203254ab | 10:d35e5d769c87 |
---|---|
139 QString nr = n; | 139 QString nr = n; |
140 nr.replace("(I)", "I"); | 140 nr.replace("(I)", "I"); |
141 nr.replace("(II)", "II"); | 141 nr.replace("(II)", "II"); |
142 nr.replace("(III)", "III"); | 142 nr.replace("(III)", "III"); |
143 c->addAlias(nr); | 143 c->addAlias(nr); |
144 } | |
145 | |
146 QString makeNameKey(QString name) | |
147 { | |
148 QString key = name.toLower() | |
149 .replace("'", "") | |
150 .replace("x", "ks") | |
151 .replace("y", "i") | |
152 .replace("k", "c") | |
153 .replace("ch", "c") | |
154 .replace("cc", "c") | |
155 .replace("v", "f") | |
156 .replace("ff", "f") | |
157 .replace("th", "t") | |
158 .replace("tch", "ch") | |
159 .replace("er", "r"); | |
160 // DEBUG << "makeNameKey(" << name << "): " << key << endl; | |
161 return key; | |
162 } | |
163 | |
164 bool namesFuzzyMatch(QString an, Composer *b) | |
165 { | |
166 // ew! | |
167 | |
168 QString bn = b->name(); | |
169 if (bn == an) return true; | |
170 if (b->aliases().contains(an)) return true; | |
171 int aSurnameIndex = 0, bSurnameIndex = 0; | |
172 if (an.contains(",")) { | |
173 an.replace(",", ""); | |
174 } else { | |
175 aSurnameIndex = -1; | |
176 } | |
177 if (bn.contains(",")) { | |
178 bn.replace(",", ""); | |
179 } else { | |
180 bSurnameIndex = -1; | |
181 } | |
182 QStringList nl = an.split(QRegExp("[ -]")); | |
183 QStringList bnl = makeNameKey(bn).split(QRegExp("[ -]")); | |
184 int matchCount = 0; | |
185 QString surnameMatch = ""; | |
186 if (aSurnameIndex == -1) aSurnameIndex = nl.size()-1; | |
187 if (bSurnameIndex == -1) bSurnameIndex = bnl.size()-1; | |
188 if (nl[aSurnameIndex][0].isUpper() && | |
189 nl[aSurnameIndex] != "Della" && | |
190 makeNameKey(nl[aSurnameIndex]) == bnl[bSurnameIndex]) { | |
191 surnameMatch = nl[aSurnameIndex]; | |
192 } | |
193 int tested = 0; | |
194 foreach (QString elt, nl) { | |
195 if (!elt[0].isUpper() || elt == "Della") continue; | |
196 QString k = makeNameKey(elt); | |
197 if (bnl.contains(k)) { | |
198 ++matchCount; | |
199 } | |
200 if (++tested == 2 && matchCount == 0) { | |
201 return false; | |
202 } | |
203 } | |
204 if (surnameMatch != "") { | |
205 DEBUG << "namesFuzzyMatch: note: surnameMatch = " << surnameMatch << endl; | |
206 if (matchCount > 1) { | |
207 return true; | |
208 } else { | |
209 DEBUG << "(but not enough else matched)" << endl; | |
210 return false; | |
211 } | |
212 } | |
213 return false; | |
214 } | 144 } |
215 | 145 |
216 bool | 146 bool |
217 hasBetterName(Composer *c, Composer *other) | 147 hasBetterName(Composer *c, Composer *other) |
218 { | 148 { |
262 } | 192 } |
263 | 193 |
264 QSet<Composer *> matches; | 194 QSet<Composer *> matches; |
265 | 195 |
266 foreach (QString candidateName, allNames) { | 196 foreach (QString candidateName, allNames) { |
267 QString key = makeNameKey(candidateName); | 197 QString key = Composer::reduceName(candidateName); |
268 if (composers.contains(key)) { | 198 if (composers.contains(key)) { |
269 foreach (Composer *candidate, composers[key]) { | 199 foreach (Composer *candidate, composers[key]) { |
270 if (candidateName == dates) { | 200 if (candidateName == dates) { |
271 if (c->name() == candidate->name()) { | 201 if (c->name() == candidate->name()) { |
272 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; | 202 DEBUG << "mergeComposer: Exact name match for " << c->name() << " with date(s) " << dates << endl; |
273 } else if (!namesFuzzyMatch(c->name(), candidate) && | 203 } else if (!candidate->matchCatalogueName(c->name()) && |
274 !namesFuzzyMatch(candidate->name(), c)) { | 204 !c->matchCatalogueName(candidate->name())) { |
275 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; | 205 DEBUG << "mergeComposer: Names differ for " << c->name() << " and " << candidate->name() << " (having matched date(s) " << dates << ")" << endl; |
276 continue; | 206 continue; |
277 } else { | 207 } else { |
278 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; | 208 DEBUG << "mergeComposer: Note: Fuzzy name match for " << c->name() << " and " << candidate->name() << " with date(s) " << dates << endl; |
279 } | 209 } |
280 } else { | 210 } else { |
281 if (!c->datesMatch(candidate)) { | 211 if (!c->matchDates(candidate)) { |
282 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; | 212 DEBUG << "mergeComposer: Dates differ for " << c->name() << " and " << candidate->name() << endl; |
283 continue; | 213 continue; |
284 } | 214 } |
285 } | 215 } |
286 matches.insert(candidate); | 216 matches.insert(candidate); |
295 DEBUG << "Composer has no dates, laboriously searching for all names" << endl; | 225 DEBUG << "Composer has no dates, laboriously searching for all names" << endl; |
296 // laboriously look for fuzzy match across _all_ composers | 226 // laboriously look for fuzzy match across _all_ composers |
297 for (ComposerMap::iterator i = composers.begin(); | 227 for (ComposerMap::iterator i = composers.begin(); |
298 i != composers.end(); ++i) { | 228 i != composers.end(); ++i) { |
299 foreach (Composer *candidate, *i) { | 229 foreach (Composer *candidate, *i) { |
300 if (namesFuzzyMatch(c->name(), candidate)) { | 230 if (candidate->matchCatalogueName(c->name())) { |
301 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; | 231 DEBUG << "mergeComposer: Found fuzzy match for undated composer " << c->name() << " as " << candidate->name() << ", daringly merging" << endl; |
302 matches.insert(candidate); | 232 matches.insert(candidate); |
303 break; | 233 break; |
304 } | 234 } |
305 } | 235 } |
307 } | 237 } |
308 } | 238 } |
309 | 239 |
310 if (matches.empty()) { | 240 if (matches.empty()) { |
311 foreach (QString candidateName, allNames) { | 241 foreach (QString candidateName, allNames) { |
312 QString key = makeNameKey(candidateName); | 242 QString key = Composer::reduceName(candidateName); |
313 composers[key].insert(c); | 243 composers[key].insert(c); |
314 DEBUG << "added for alias or date " << candidateName << endl; | 244 DEBUG << "added for alias or date " << candidateName << endl; |
315 } | 245 } |
316 return; | 246 return; |
317 } | 247 } |
329 other->addAlias(other->name()); | 259 other->addAlias(other->name()); |
330 other->setName(c->name()); | 260 other->setName(c->name()); |
331 } else { | 261 } else { |
332 other->addAlias(c->name()); | 262 other->addAlias(c->name()); |
333 } | 263 } |
334 composers[makeNameKey(c->name())].insert(other); | 264 composers[Composer::reduceName(c->name())].insert(other); |
335 DEBUG << "linking from alias " << c->name() << endl; | 265 DEBUG << "linking from alias " << c->name() << endl; |
336 | 266 |
337 foreach (QString alias, c->aliases()) { | 267 foreach (QString alias, c->aliases()) { |
338 if (alias != other->name() && | 268 if (alias != other->name() && |
339 !other->aliases().contains(alias)) { | 269 !other->aliases().contains(alias)) { |
340 other->addAlias(alias); | 270 other->addAlias(alias); |
341 composers[makeNameKey(alias)].insert(other); | 271 composers[Composer::reduceName(alias)].insert(other); |
342 DEBUG << "linking from alias " << alias << endl; | 272 DEBUG << "linking from alias " << alias << endl; |
343 } | 273 } |
344 } | 274 } |
345 | 275 |
346 foreach (Document *d, c->pages()) { | 276 foreach (Document *d, c->pages()) { |
640 foreach (Work *w, works) { | 570 foreach (Work *w, works) { |
641 Composition *cn = w->composition(); | 571 Composition *cn = w->composition(); |
642 if (!cn) continue; | 572 if (!cn) continue; |
643 if (!cn->composer()) { | 573 if (!cn->composer()) { |
644 QString cname = cn->composerName(); | 574 QString cname = cn->composerName(); |
645 QString key = makeNameKey(cname); | 575 QString key = Composer::reduceName(cname); |
646 if (cname != "") { | 576 if (cname != "") { |
647 if (!composers.contains(key)) { | 577 if (!composers.contains(key)) { |
648 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; | 578 DEBUG << "Failed to assign Composition to composer: no composer matches name " << cname << endl; |
649 } else { | 579 } else { |
650 QSet<Composer *> cs = composers[key]; | 580 QSet<Composer *> cs = composers[key]; |