annotate base/TextMatcher.cpp @ 1854:bde22957545e

Add support for doubling escapes for quotes in quoted texts in CSV-like formats on import (similar to how we, and the relevant RFC, do escaping on export now)
author Chris Cannam
date Mon, 11 May 2020 14:43:58 +0100
parents 48e9f538e6e9
children
rev   line source
Chris@457 1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
Chris@457 2
Chris@457 3 /*
Chris@457 4 Sonic Visualiser
Chris@457 5 An audio file viewer and annotation editor.
Chris@457 6 Centre for Digital Music, Queen Mary, University of London.
Chris@457 7 This file copyright 2008 QMUL.
Chris@457 8
Chris@457 9 This program is free software; you can redistribute it and/or
Chris@457 10 modify it under the terms of the GNU General Public License as
Chris@457 11 published by the Free Software Foundation; either version 2 of the
Chris@457 12 License, or (at your option) any later version. See the file
Chris@457 13 COPYING included with this distribution for more information.
Chris@457 14 */
Chris@457 15
Chris@457 16 #include "TextMatcher.h"
Chris@457 17
Chris@457 18 TextMatcher::TextMatcher()
Chris@457 19 {
Chris@457 20 }
Chris@457 21
Chris@457 22 TextMatcher::~TextMatcher()
Chris@457 23 {
Chris@457 24 }
Chris@457 25
Chris@457 26 void
Chris@457 27 TextMatcher::test(Match &match, QStringList keywords, QString text,
Chris@1429 28 QString textType, int score)
Chris@457 29 {
Chris@457 30 /*
Chris@457 31 if (text.toLower() == keyword.toLower()) {
Chris@457 32 match.score += score * 1.5;
Chris@457 33 match.fragments << tr("%1: <b>%2</b>").arg(textType).arg(text);
Chris@457 34 return;
Chris@457 35 }
Chris@457 36 */
Chris@457 37 int len = text.length();
Chris@457 38 int prevEnd = 0;
Chris@457 39 QString fragment;
Chris@457 40
Chris@457 41 while (1) {
Chris@457 42
Chris@457 43 bool first = (prevEnd == 0);
Chris@457 44
Chris@457 45 int idx = -1;
Chris@457 46 QString keyword;
Chris@457 47
Chris@457 48 for (int ki = 0; ki < keywords.size(); ++ki) {
Chris@457 49 int midx = text.indexOf(keywords[ki], prevEnd, Qt::CaseInsensitive);
Chris@457 50 if (midx >= 0 && midx < len) {
Chris@457 51 if (midx < idx || idx == -1) {
Chris@457 52 idx = midx;
Chris@457 53 keyword = keywords[ki];
Chris@457 54 }
Chris@457 55 }
Chris@457 56 }
Chris@457 57
Chris@457 58 if (idx < 0 || idx >= len) break;
Chris@457 59
Chris@457 60 int klen = keyword.length();
Chris@457 61
Chris@457 62 if (first) {
Chris@457 63 match.score += score;
Chris@457 64 } else {
Chris@457 65 match.score += score / 4;
Chris@457 66 }
Chris@457 67
Chris@457 68 int start = idx;
Chris@457 69 int end = start + klen;
Chris@457 70
Chris@457 71 if (start == 0) match.score += 1;
Chris@457 72 if (end == len) match.score += 1;
Chris@457 73
Chris@457 74 if (start > prevEnd + 14) {
Chris@457 75 QString s = text.right((len - start) + 10);
Chris@457 76 s = XmlExportable::encodeEntities(s.left(10)) + "<b>" +
Chris@457 77 XmlExportable::encodeEntities(s.left(klen + 10).right(klen))
Chris@457 78 + "</b>";
Chris@457 79 fragment += QString("...%1").arg(s);
Chris@457 80 } else {
Chris@457 81 QString s = text.right(len - prevEnd);
Chris@457 82 s = XmlExportable::encodeEntities(s.left(start - prevEnd)) + "<b>" +
Chris@457 83 XmlExportable::encodeEntities(s.left(end - prevEnd).right(klen))
Chris@457 84 + "</b>";
Chris@457 85 fragment += s;
Chris@457 86 }
Chris@457 87
Chris@457 88 prevEnd = end;
Chris@457 89 }
Chris@457 90
Chris@457 91 if (prevEnd > 0 && prevEnd < len) {
Chris@457 92 int n = len - prevEnd;
Chris@457 93 fragment +=
Chris@457 94 XmlExportable::encodeEntities(text.right(n).left(n < 8 ? n : 8));
Chris@457 95 }
Chris@457 96
Chris@457 97 if (fragment != "") {
Chris@457 98 match.fragments[textType] = fragment;
Chris@457 99 }
Chris@457 100 }
Chris@457 101
Chris@457 102 bool
Chris@457 103 TextMatcher::Match::operator<(const Match &m) const
Chris@457 104 {
Chris@457 105 if (score != m.score) {
Chris@457 106 return score < m.score;
Chris@457 107 }
Chris@457 108 if (key != m.key) {
Chris@457 109 return key < m.key;
Chris@457 110 }
Chris@457 111 if (fragments.size() != m.fragments.size()) {
Chris@457 112 return fragments.size() < m.fragments.size();
Chris@457 113 }
Chris@457 114
Chris@457 115 for (FragmentMap::const_iterator
Chris@457 116 i = fragments.begin(),
Chris@457 117 j = m.fragments.begin();
Chris@457 118 i != fragments.end(); ++i, ++j) {
Chris@457 119 if (i->first != j->first) return i->first < j->first;
Chris@457 120 if (i->second != j->second) return i->second < j->second;
Chris@457 121 }
Chris@457 122
Chris@457 123 return false;
Chris@457 124 }