Chris@23
|
1 #!/bin/bash
|
Chris@23
|
2
|
Chris@23
|
3 # Rebuild the RDF database of composers, works, etc.
|
Chris@23
|
4
|
Chris@23
|
5 # Do not use this script. Once the database is built, we need to be
|
Chris@23
|
6 # able to carry out manual editing, disambiguation etc without risking
|
Chris@23
|
7 # losing our changes in a future rebuild. Following the initial build
|
Chris@23
|
8 # and publication, we really need to ensure that updates can be made
|
Chris@23
|
9 # without a complete rebuild.
|
Chris@23
|
10
|
Chris@23
|
11 # Perhaps we will need to make partial rebuild scripts (importing some
|
Chris@23
|
12 # new type of data that was not in the database at all before, for
|
Chris@23
|
13 # example) based on this.
|
Chris@23
|
14
|
Chris@23
|
15 # All that said, this script is provided anyway for purposes of review
|
Chris@23
|
16 # and reproducability.
|
Chris@23
|
17
|
Chris@23
|
18 echo "Running importer, log is written to importer.log"
|
Chris@23
|
19
|
Chris@25
|
20 #./importer 2>importer.log || exit 1
|
Chris@23
|
21
|
Chris@23
|
22 echo "Assembling additional sources"
|
Chris@23
|
23
|
Chris@23
|
24 rm -f ready.ntriples
|
Chris@23
|
25
|
Chris@23
|
26 for ttl in \
|
Chris@23
|
27 imported.ttl \
|
Chris@23
|
28 extra/cmn.ttl \
|
Chris@23
|
29 extra/composer-mappings.ttl \
|
Chris@23
|
30 extra/conductors.ttl \
|
Chris@23
|
31 extra/new-names.ttl \
|
Chris@23
|
32 extra/pianists-dbpedia.ttl \
|
Chris@23
|
33 extra/styles.ttl ; do
|
Chris@23
|
34 cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples
|
Chris@23
|
35 done
|
Chris@23
|
36
|
Chris@25
|
37 sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples
|
Chris@24
|
38
|
Chris@23
|
39 grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \
|
Chris@24
|
40 sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris
|
Chris@23
|
41
|
Chris@23
|
42 diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$
|
Chris@23
|
43
|
Chris@23
|
44 added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'`
|
Chris@23
|
45 removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'`
|
Chris@23
|
46
|
Chris@23
|
47 echo "Done, result is in ready.ntriples"
|
Chris@23
|
48 echo
|
Chris@23
|
49 echo "Composer URI comparison:"
|
Chris@23
|
50 echo "Added: $added"
|
Chris@23
|
51 echo "Removed: $removed"
|
Chris@23
|
52
|
Chris@23
|
53 rm /tmp/$$
|