To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Revision:

root / import / ImportWikipediaWorksList.cpp

History | View | Annotate | Download (21.3 KB)

1
/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2

    
3
#include "ImportWikipediaWorksList.h"
4

    
5
#include <dataquay/Debug.h>
6

    
7
#include <QFile>
8
#include <QFileInfo>
9
#include <QTextStream>
10
#include <QRegExp>
11
#include <QVariant>
12

    
13
#include <exception>
14

    
15
using namespace Dataquay;
16

    
17
namespace ClassicalData {
18

    
19
void
20
WikipediaWorksListImporter::setSource(QUrl source)
21
{
22
    DEBUG << "WikipediaWorksListImporter::setSource: " << source << endl;
23
    import(source);
24
}
25

    
26
static QString
27
sanitise(QString field, QString &linkText)
28
{
29
    int mp;
30

    
31
    field.replace(QString::fromUtf8("\342\200\222"), "-");
32
    field.replace(QString::fromUtf8("\342\200\223"), "-");
33
    field.replace(QString::fromUtf8("\342\200\224"), "-");
34
    field.replace(QString::fromUtf8("\342\200\225"), "-");
35

    
36
    field.replace(QString::fromUtf8("\342\231\255"), "-flat");
37
    field.replace(QString::fromUtf8("\342\231\257"), "-sharp");
38

    
39
    QRegExp link2("([^A-Za-z]*)\\[\\[([^\\]\\|]+)\\|([^\\]]+)\\]\\]");
40
    if ((mp = link2.indexIn(field)) >= 0) {
41
        if (linkText == "" && mp < 4) linkText = link2.cap(2);
42
        field.replace(mp, link2.matchedLength(), link2.cap(1) + link2.cap(3));
43
        return sanitise(field, linkText);
44
    }
45

    
46
    QRegExp link1("^([^A-Za-z]*)\\[\\[([^\\]]+)\\]\\]");
47
    if ((mp = link1.indexIn(field)) >= 0) {
48
        if (linkText == "") linkText = link1.cap(2);
49
        field.replace(mp, link1.matchedLength(), link1.cap(1) + link1.cap(2));
50
        return sanitise(field, linkText);
51
    }
52

    
53
    field = field.trimmed();
54

    
55
    field.replace("[", "");
56
    field.replace("]", "");
57
    field.replace(QRegExp("\\{+[^\\}]*\\}+ *"), " ");
58
    field.replace("'''", "\"");
59
    field.replace("''", "\"");
60
    field.replace("&quot;", "\"");
61
    field.replace("\"\"", "\"");
62
    field.replace(QRegExp("^[\'\"] (\")?"), "\"");
63
    field.replace(QRegExp("&lt;[^&]*&gt;"), "");
64
    field.replace(QRegExp("^\\**"), "");
65
    
66
    if (field.endsWith("c.")) {
67
        // historical artifact from removal of Bruckner year indication (c. 1856)
68
        field = field.left(field.length()-2);
69
    }
70

    
71
    while (field.endsWith(".") || field.endsWith(",")) {
72
        field = field.left(field.length()-1);
73
    }
74

    
75
    if (field.startsWith(";") || field.startsWith(":") || field.startsWith(",")
76
        || field.startsWith("-")) {
77
        field = field.right(field.length()-1);
78
    }
79

    
80
    if (field.startsWith("(") && field.endsWith(")")) {
81
        DEBUG << "before: " << field;
82
        field = field.mid(1, field.length()-2);
83
        DEBUG << "after: " << field;
84
    }
85

    
86
    field.replace(QRegExp("^\\**"), "");
87
    if (field == ")" || field == "(") {
88
        field = "";
89
    }
90

    
91
    field.replace(" - ,", ",");
92
    field.replace("  ", " ");
93

    
94
    return field.trimmed();
95
}
96

    
97
static QString
98
extractYear(QString datefield)
99
{
100
    QRegExp re("[0-9]{4}");
101
    if (re.indexIn(datefield) >= 0) {
102
        return re.cap(0);
103
    }
104
    return "";
105
}
106

    
107
static QString
108
extractKey(QString titlefield)
109
{
110
    QRegExp re("in ([A-H]([ -][a-z]+)? (major|minor))");
111
    if (re.indexIn(titlefield) >= 0) {
112
        return re.cap(1);
113
    }
114
    return "";
115
}
116

    
117
static Work *
118
makeWork(QString composerName, QString opfield, QString numfield,
119
         int partNumber, QString titlefield, QString datefield,
120
         QString placefield, QString remarksfield, Work *main)
121
{
122
    if (titlefield.contains("List of ") || titlefield.contains("http:")) return 0;
123

    
124
    QString linkText;
125

    
126
    Work *w = new Work;
127

    
128
    QRegExp embeddedOpMatcher("([Oo]pus|[Oo]p.|WAB) (posth[a-z\\.]* *)?([0-9][^ ;:,]*)(,? *([Nn]umber|[Nn]o.|[Nn]r.) ([0-9][^ ;:,]*))?,?");
129
    if (embeddedOpMatcher.indexIn(titlefield) >= 0) {
130
        QString opf = embeddedOpMatcher.cap(0);
131
        if (opfield == "") opfield = opf;
132
        titlefield.replace(opf, "");
133
    } else if (embeddedOpMatcher.indexIn(remarksfield) >= 0) {
134
        opfield = embeddedOpMatcher.cap(0);
135
    }
136
    if (main && numfield == "") {
137
        QRegExp embeddedNumMatcher("(Number|No.|Nr.) ([0-9][^ ;:,]*)");
138
        if (embeddedNumMatcher.indexIn(titlefield) >= 0) {
139
            numfield = embeddedNumMatcher.cap(2);
140
        } else if (embeddedNumMatcher.indexIn(remarksfield) >= 0) {
141
            numfield = embeddedNumMatcher.cap(2);
142
        }
143
    }
144

    
145
    QString op = sanitise(opfield, linkText);
146
    if (op != "") {
147
        if (op.toLower().contains("op")) {
148
            op.replace("Opus ", "");
149
            op.replace("Op. ", "");
150
            op.replace("Op.", "");
151
            op.replace("Op ", "");
152
            op.replace("opus ", "");
153
            op.replace("op. ", "");
154
            op.replace("op.", "");
155
            op.replace("op ", "");
156
            w->setOpus(op);
157
        } else if (QRegExp("^[0-9]*$").indexIn(op) >= 0) {
158
            w->setOpus(op);
159
        } else {
160
            w->setCatalogue(op);
161
        }
162
    }
163

    
164
    QString num = sanitise(numfield, linkText);
165
    if (num != "") {
166
        num.replace("No. ", "");
167
        num.replace("No ", "");
168
        w->setNumber(num);
169
    } else if (partNumber > 0) {
170
        w->setNumber(QString("%1").arg(partNumber));
171
    }
172

    
173
    QString key = extractKey(titlefield);
174
    if (key != "") {
175
        w->setKey(key);
176
    }
177

    
178
    DEBUG << "title before sanitise: " << titlefield << endl;
179

    
180
    remarksfield = remarksfield.trimmed();
181

    
182
    QString title = sanitise(titlefield, linkText);
183
    title.replace(QRegExp(", which.*$"), "");
184
    if (linkText != "") {
185
        if (remarksfield == "" && title.startsWith(linkText)) {
186
            remarksfield = title.right(title.length() - linkText.length());
187
            title = linkText;
188
        }
189
        linkText.replace(" ", "_");
190
        QUrl url;
191
        url.setScheme("http");
192
        url.setHost("en.wikipedia.org");
193
        url.setPath("/wiki/" + QUrl::toPercentEncoding(linkText));
194
        Document *d = new Document;
195
        d->setUri(Uri(url));
196
        d->setSiteName("Wikipedia");
197
        d->setTopic(w);
198
        w->addPage(d);
199
    }
200

    
201
    DEBUG << "title after sanitise: " << title << ", link text " << linkText << ", remarks " << remarksfield << endl;
202

    
203
    QRegExp explicationRE("^(\"[^-]+\") - (.+)$");
204
    int pos;
205
    if ((pos = explicationRE.indexIn(title)) >= 0) {
206
        QString part = explicationRE.cap(2);
207
        if (part[0].isUpper()) w->addAlias(explicationRE.cap(2));
208
        else if (remarksfield == "") remarksfield = explicationRE.cap(2);
209
        title = explicationRE.cap(1);
210
    }
211

    
212
    QRegExp remarksRE1("^(\"[^-]+\") (for .*)$");
213
    if ((pos = remarksRE1.indexIn(title)) >= 0) {
214
        if (remarksfield != "") {
215
            remarksfield = QString("%1 - %2")
216
                .arg(remarksRE1.cap(2)).arg(remarksfield);
217
        } else {
218
            remarksfield = remarksRE1.cap(2);
219
        }
220
        title = remarksRE1.cap(1);
221
    }
222
    
223
    QRegExp remarksRE2("^(\"[^\"]+\"), (.*)$");
224
    if ((pos = remarksRE2.indexIn(title)) >= 0) {
225
        if (remarksfield != "") {
226
            remarksfield = QString("%1 - %2")
227
                .arg(remarksRE2.cap(2)).arg(remarksfield);
228
        } else {
229
            remarksfield = remarksRE2.cap(2);
230
        }
231
        title = remarksRE2.cap(1);
232
    }
233

    
234
    QRegExp explicationRE2("^([^\\(]*\") \\(([^\\)]*)\\)(.*)$");
235
    if ((pos = explicationRE2.indexIn(title)) >= 0) {
236
        w->addAlias(explicationRE2.cap(2));
237
        if (remarksfield == "") remarksfield = explicationRE2.cap(3);
238
        title = explicationRE2.cap(1);
239
    }
240

    
241
    if (title.startsWith("Song \"")) {
242
        title = title.right(title.length() - 5);
243
        w->addForm(Form::getFormByName("song"));
244
    }
245
    if (!main && title.startsWith("Song cycle \"")) {
246
        title = title.right(title.length() - 11);
247
        w->addForm(Form::getFormByName("song cycle"));
248
    }
249
    if (main && main->forms().contains(Form::getFormByName("song cycle"))) {
250
        w->addForm(Form::getFormByName("song"));
251
    }
252

    
253
    if (title == "" && !main) {
254
        delete w;
255
        return 0;
256
    }
257

    
258
    w->setName(title);
259
    
260
    QString remarks = sanitise(remarksfield, linkText);
261
    if (remarks != "") {
262
        w->setRemarks(remarks);
263
    }
264

    
265
    QString year = extractYear(datefield);
266
    QString place = sanitise(placefield, linkText);
267

    
268
    DEBUG << "title = " << title << endl;
269

    
270
    if (main) {
271
        main->addPart(w);
272
        w->setPartOf(main);
273
        w->setComposition(main->composition());
274
        main->composition()->addWork(w);
275
    }
276

    
277
    if (!main || !main->composition() ||
278
        (year != "" && (main->composition()->year() != year.toInt()))) {
279
        Composition *c = new Composition;
280
        c->setComposerName(composerName);
281
        c->addWork(w);
282
        c->setYear(year.toInt());
283
        c->setPlace(place);
284
        w->setComposition(c);
285
    }
286

    
287
    return w;
288
}
289

    
290

    
291
void
292
WikipediaWorksListImporter::import(QUrl source)
293
{
294
    //!!! for now
295
    QString filename = source.toLocalFile();
296

    
297
    QFile file(filename);
298
    if (!file.open(QFile::ReadOnly | QFile::Text)) {
299
        throw std::exception();
300
    }
301

    
302
    QTextStream stream(&file);
303
    stream.setCodec("UTF-8");
304
    
305
    QString composerName;
306
    if (filename.contains("K%C3%B6chel")) {
307
        composerName = "Wolfgang Amadeus Mozart";
308
    } else if (filename.contains("/Schubert_")) {
309
        composerName = "Franz Schubert";
310
    } else {
311
        QRegExp byby("by_(.*)_by");
312
        if (byby.indexIn(filename) >= 0) {
313
            composerName = byby.cap(1).replace('_', ' ');
314
        } else {
315
            QRegExp bybr("by_(.*)_\\(");
316
            if (bybr.indexIn(filename) >= 0) {
317
                composerName = bybr.cap(1).replace('_', ' ');
318
            } else {
319
                QRegExp by("by_(.*)");
320
                if (by.indexIn(filename) >= 0) {
321
                    composerName = by.cap(1).replace('_', ' ');
322
                } else {
323
                    QRegExp of("of_([A-Z].*)");
324
                    if (of.indexIn(filename) >= 0) {
325
                        composerName = of.cap(1).replace('_', ' ');
326
                    }
327
                }
328
            }
329
        }
330
    }
331
    composerName = QUrl::fromPercentEncoding(composerName.toLocal8Bit());
332

    
333
    DEBUG << "composerName = " << composerName << endl;
334

    
335

    
336
    // We try to keep these matchers specific enough that we can be
337
    // sure the title field will come out containing _at least_ the
338
    // title.  i.e. the title field should never end up with just the
339
    // opus number or date or whatever, even if the line is formatted
340
    // in a way we hadn't anticipated.  Thus it helps if the title is
341
    // bookended by '' or [[]], etc
342

    
343
    // e.g. Beethoven
344
    // *Opus 84: ''[[Egmont (Beethoven)|Egmont]]'', overture and incidental music (1810)
345
    // opus field - n/a - title - date - n/a - remarks
346
    QRegExp workMatcher1("^\\* *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:{]*)[:,] *(.*) *\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\) *(.*)$");
347

    
348
    // e.g. Tchaikovsky
349
    // *'''Op. 19''' 6 Pieces, for piano (1873)
350
    // or Ravel
351
    // * '''1''', Piano Sonata movement (1888), lost
352

    
353
/*
354
    // opus field - n/a - title - date - n/a - remarks
355
    QRegExp workMatcher1a("^\\* *'''(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G)? *[0-9][^ ,:'{]*)'''[:, ] *(.*) *\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\) *(.*)$");
356
*/
357
    // opus field - n/a - title
358
    QRegExp workMatcher1a("^\\* *'''(([Oo]pus|[Oo]p\\.|WoO|Anh|[A-Z]{1,2})?\\.? *[0-9][^ ,:'{]*),?'''[:, ] *(.*)$");
359

    
360
    // e.g. Copland
361
    // * ''Four Motets'' for mixed voices (1921)
362
    // title - date field
363
    // (no opus)
364
    QRegExp workMatcher2("^\\* *(''.*''\\)?) *(.*)$");
365
    workMatcher2.setMinimal(true); // avoid matching multiple ''...'' substrings
366

    
367
    // e.g. Copland
368
    // * Arrangement of ''Lincoln Portrait'' for concert band (1942)
369
    // or Mendelssohn
370
    // * [[Christe du Lamm Gottes]] (1827), SATB, strings
371
    // title - date field - remarks
372
    // (no opus)
373
    QRegExp workMatcher3("^\\* *([^\\*].*) *\\(([^\\)]*[0-9]{4}[^\\)]*)\\) *(.*)$");
374
    
375
    // e.g. Scriabin
376
    // *[[Sonata No. 2 (Scriabin)|Sonata No. 2 in G sharp minor]], Op. 19  (also known as ''Sonata-Fantasy'')"
377
    // title - opus field - n/a - remarks
378
    QRegExp workMatcher4("^\\* *(\\[\\[.*\\]\\]),* (([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*) *(.*)$");
379

    
380
    // e.g. Scriabin
381
    // *Opus 35: [[Opus 35 (Scriabin)|Three Preludes]]
382
    // opus field - n/a - title - remarks
383
    QRegExp workMatcher5("^\\* *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*)[:,]* *([\\[']+.*[\\]']+) *(.*)$");
384

    
385
    // e.g. Boccherini
386
    // *G 1: Cello Sonata in F major
387
    // or weird Schubert layout
388
    // * D 505{{nbsp|4}}Adagio in D-flat for Piano
389
    // or Glazunov
390
    // :Op. 67: ''[[The Seasons (ballet)|The Seasons]]'', ballet in one act (1900)
391
    // or even
392
    // ::Op. 77: ''[[Symphony No. 7 (Glazunov)|Symphony No. 7]]'' &quot;Pastorale&quot; in F major (1902-1903)
393
    // This one is a real mess, for really messy pages.  Needs to go near
394
    // the end of the matchers in case it catches something it shouldn't
395
    // n/a - opus field - n/a - n/a - n/a - title
396
    QRegExp workMatcher6("^([\\*:]|::) *(([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*)(([:,]| *\\{+[^\\}]+\\}+) *(.*))?$");
397

    
398
    // e.g. Bruch
399
    // * Adagio appassionato for violin and orchestra in C sharp minor, Op. 57
400
    // title - opus field - date field
401
    QRegExp workMatcher7("^\\* *(.*),? (([Oo]pus|[Oo]p\\.|WoO|Anh|H|D|G) *[0-9][^ ,:'{]*|[Oo]p. posth[a-z.]*) *(\\([^\\)]*([0-9]{4}(-[0-9]+)*)[^0-9\\)]*\\))? *$");
402

    
403
    // e.g. Bruckner
404
    // * Symphony No. 0 in D minor 1869 WAB 100
405
    // title - date field - opus field
406
    QRegExp workMatcher8("^\\* *(.*) ([0-9]{4}[0-9/-]*) *(WAB [0-9][^ ]*)$");
407

    
408
    // e.g. Bach
409
    // * BWV 506 ? Was bist du doch, o Seele, so betruebet
410
    // opus field - title
411
    QRegExp workMatcher9("^\\* *(BWV [^ ]+)(.*)$");
412

    
413
    // Catch-all for things that look at all promising (anything that
414
    // starts with ' or [ after bullet: take the whole as title)
415
    QRegExp workMatcher10("^[\\*:] *((['\\[]|&quot;).*)$");
416

    
417

    
418

    
419
    // e.g. Beethoven
420
    // **No. 1: [[Piano Trio No. 1 (Beethoven)|Piano Trio No. 1]] in E-flat major
421
    // number field - n/a - title, remarks etc
422
    QRegExp partMatcher1("^[\\*:]{2} *((No\\.? *)?[0-9][^ ,:'{]*)[:, ] *(.*)$");
423

    
424
    // e.g. Copland
425
    // ** ''Help us, O Lord''
426
    // title - remarks
427
    QRegExp partMatcher2("^\\*\\* *(''.*'') *(.*)$");
428
    partMatcher2.setMinimal(true); // avoid matching multiple ''...'' substrings
429

    
430
    // e.g. Scriabin
431
    // **[[Mazurka Op. 40 No. 1 (Scriabin)|Mazurka in D flat major]]
432
    // title - remarks
433
    QRegExp partMatcher3("^\\*\\* *(\\[\\[.*\\]\\])(.*)$");
434

    
435
    // e.g. Berlioz
436
    // ** 1: ''Méditation religieuse''
437
    // number - title - remarks
438
    QRegExp partMatcher4("^\\*\\* *([0-9][0-9a-z]*)[\\.: ] *([\\[]*''.*''[\\]]*) *(.*)$");
439

    
440
    // e.g. Tchaikovsky
441
    // **4. Nocturne [???????] (C? minor)
442
    // number - title - remarks
443
    QRegExp partMatcher5("^\\*\\* *([0-9][0-9a-z]*)[\\.: ] *(.*\\[[^\\]]+\\])(.*)$");
444

    
445
    // e.g. Schubert
446
    // **2. &quot;Wohin?&quot;
447
    // n/a - number - title
448
    QRegExp partMatcher6("^\\*\\* *(([0-9][0-9a-z]*)[\\.:])? *((&quot;|'').*)$");
449
   
450
    // e.g. Mendelssohn
451
    // ** Notturno
452
    // title only
453
    QRegExp partMatcher7("^\\*\\* *(.*)$");
454

    
455

    
456
    // Date and remarks within titlefield or remarksfield
457
    QRegExp matcherDate("\\([^\\)]*([0-9]{4})[^0-9\\)]*\\),?(.*)");
458

    
459
    
460
    Work *main = 0;
461
    int partNumber = 0;
462

    
463
    QString line;
464
    QString opfield, numfield, titlefield, remarksfield, datefield;
465

    
466
    while (!stream.atEnd()) {
467

    
468
        if (line == "") {
469
            line = stream.readLine();
470
            DEBUG << "line: " << line << endl;
471
        }
472

    
473
        opfield = "";
474
        numfield = "";
475
        titlefield = "";
476
        datefield = "";
477
        remarksfield = "";
478
        partNumber = 0;
479

    
480
        if (workMatcher1.indexIn(line) >= 0) {
481

    
482
            DEBUG << "matcher 1" << endl;
483
            opfield = workMatcher1.cap(1);
484
            titlefield = workMatcher1.cap(3);
485
            datefield = workMatcher1.cap(4);
486
            remarksfield = workMatcher1.cap(6);
487

    
488
        } else if (workMatcher1a.indexIn(line) >= 0) {
489

    
490
            DEBUG << "matcher 1a" << endl;
491
            opfield = workMatcher1a.cap(1);
492
            titlefield = workMatcher1a.cap(3);
493
/*
494
            datefield = workMatcher1a.cap(4);
495
            remarksfield = workMatcher1a.cap(6);
496
*/
497

    
498
        } else if (workMatcher2.indexIn(line) >= 0) {
499

    
500
            DEBUG << "matcher 2" << endl;
501
            titlefield = workMatcher2.cap(1);
502
            remarksfield = workMatcher2.cap(2);
503
            
504
        } else if (workMatcher3.indexIn(line) >= 0) {
505

    
506
            DEBUG << "matcher 3" << endl;
507
            titlefield = workMatcher3.cap(1);
508
            datefield = workMatcher3.cap(2);
509
            remarksfield = workMatcher3.cap(3);
510

    
511
        } else if (workMatcher4.indexIn(line) >= 0) {
512

    
513
            DEBUG << "matcher 4" << endl;
514
            titlefield = workMatcher4.cap(1);
515
            opfield = workMatcher4.cap(2);
516
            remarksfield = workMatcher4.cap(4);
517

    
518
        } else if (workMatcher5.indexIn(line) >= 0) {
519

    
520
            DEBUG << "matcher 5" << endl;
521
            opfield = workMatcher5.cap(1);
522
            titlefield = workMatcher5.cap(3);
523
            remarksfield = workMatcher5.cap(4);
524

    
525
        } else if (workMatcher6.indexIn(line) >= 0) {
526

    
527
            DEBUG << "matcher 6" << endl;
528
            opfield = workMatcher6.cap(2);
529
            titlefield = workMatcher6.cap(6);
530

    
531
        } else if (workMatcher7.indexIn(line) >= 0) {
532

    
533
            DEBUG << "matcher 7" << endl;
534
            titlefield = workMatcher7.cap(1);
535
            opfield = workMatcher7.cap(2);
536
            datefield = workMatcher7.cap(3);
537

    
538
        } else if (workMatcher8.indexIn(line) >= 0) {
539

    
540
            DEBUG << "matcher 8" << endl;
541
            titlefield = workMatcher8.cap(1);
542
            datefield = workMatcher8.cap(2);
543
            opfield = workMatcher8.cap(3);
544

    
545
        } else if (workMatcher9.indexIn(line) >= 0) {
546

    
547
            DEBUG << "matcher 9" << endl;
548
            opfield = workMatcher9.cap(1);
549
            titlefield = workMatcher9.cap(2);
550

    
551
        } else if (workMatcher10.indexIn(line) >= 0) {
552

    
553
            DEBUG << "matcher 10" << endl;
554
            titlefield = workMatcher10.cap(1);
555

    
556
        } else {
557
            if (line.startsWith("*") || line.startsWith(":")) {
558
                DEBUG << "Failed to match promising works list line: " << line << endl;
559
            }
560
            line = "";
561
            continue;
562
        }
563

    
564
        if (titlefield != "" && datefield == "") {
565
            int dpos;
566
            if ((dpos = matcherDate.indexIn(titlefield)) != -1) {
567
                datefield = matcherDate.cap(1);
568
                remarksfield = matcherDate.cap(2);
569
                titlefield = titlefield.left(dpos);
570
            }
571
        }
572

    
573
        if (remarksfield != "" && datefield == "") {
574
            int dpos;
575
            if ((dpos = matcherDate.indexIn(remarksfield)) != -1) {
576
                datefield = matcherDate.cap(1);
577
                remarksfield = remarksfield.left(dpos);
578
            }
579
        }
580

    
581
        main = makeWork(composerName, opfield, "", 0,
582
                        titlefield, datefield, "", remarksfield, 0);
583

    
584
        if (main) m_objects.push_back(main);
585

    
586
        line = "";
587

    
588
        while (!stream.atEnd()) {
589
            
590
            ++partNumber;
591
            line = stream.readLine();
592
            DEBUG << "line: " << line << endl;
593
            
594
            if (partMatcher1.indexIn(line) >= 0) {
595
                
596
                DEBUG << "part matcher 1" << endl;
597
                numfield = partMatcher1.cap(1);
598
                titlefield = partMatcher1.cap(3);
599
                remarksfield = "";
600

    
601
            } else if (partMatcher2.indexIn(line) >= 0) {
602
                
603
                DEBUG << "part matcher 2" << endl;
604
                titlefield = partMatcher2.cap(1);
605
                remarksfield = partMatcher2.cap(2);
606

    
607
            } else if (partMatcher3.indexIn(line) >= 0) {
608
                
609
                DEBUG << "part matcher 3" << endl;
610
                titlefield = partMatcher3.cap(1);
611
                remarksfield = partMatcher3.cap(2);
612

    
613
            } else if (partMatcher4.indexIn(line) >= 0) {
614
                
615
                DEBUG << "part matcher 4" << endl;
616
                numfield = partMatcher4.cap(1);
617
                titlefield = partMatcher4.cap(2);
618
                remarksfield = partMatcher4.cap(3);
619

    
620
            } else if (partMatcher5.indexIn(line) >= 0) {
621
                
622
                DEBUG << "part matcher 5" << endl;
623
                numfield = partMatcher5.cap(1);
624
                titlefield = partMatcher5.cap(2);
625
                remarksfield = partMatcher5.cap(3);
626

    
627
            } else if (partMatcher6.indexIn(line) >= 0) {
628
                
629
                DEBUG << "part matcher 6" << endl;
630
                numfield = partMatcher6.cap(2);
631
                titlefield = partMatcher6.cap(3);
632

    
633
            } else if (partMatcher7.indexIn(line) >= 0) {
634
                
635
                DEBUG << "part matcher 7" << endl;
636
                titlefield = partMatcher7.cap(1);
637

    
638
            } else {
639
                if (line.startsWith("**") || line.startsWith("::")) {
640
                    DEBUG << "Failed to match promising part line: " << line << endl;
641
                }
642
                break;
643
            }
644

    
645
            if (titlefield != "" && datefield == "") {
646
                int dpos;
647
                if ((dpos = matcherDate.indexIn(titlefield)) != -1) {
648
                    datefield = matcherDate.cap(1);
649
                    remarksfield = matcherDate.cap(2);
650
                    titlefield = titlefield.left(dpos);
651
                }
652
            }
653

    
654
            Work *part = makeWork(composerName, opfield, numfield, partNumber,
655
                                  titlefield, datefield, "", remarksfield,
656
                                  main);
657

    
658
            if (part) m_objects.push_back(part);
659
        }
660
    }        
661

    
662
    DEBUG << "Found " << m_objects.size() << " things" << endl;
663
}
664

    
665

    
666
}
667

    
668