To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Revision:

root / import / build-database.sh

History | View | Annotate | Download (1.66 KB)

1
#!/bin/bash
2

    
3
# Rebuild the RDF database of composers, works, etc.
4

    
5
# Do not use this script.  Once the database is built, we need to be
6
# able to carry out manual editing, disambiguation etc without risking
7
# losing our changes in a future rebuild.  Following the initial build
8
# and publication, we really need to ensure that updates can be made
9
# without a complete rebuild.
10

    
11
# Perhaps we will need to make partial rebuild scripts (importing some
12
# new type of data that was not in the database at all before, for
13
# example) based on this.
14

    
15
# All that said, this script is provided anyway for purposes of review
16
# and reproducability.
17

    
18
echo "Running importer, log is written to importer.log"
19

    
20
#./importer 2>importer.log || exit 1
21

    
22
echo "Assembling additional sources"
23

    
24
rm -f ready.ntriples
25

    
26
for ttl in \
27
    imported.ttl \
28
    extra/cmn.ttl \
29
    extra/composer-mappings.ttl \
30
    extra/conductors.ttl \
31
    extra/new-names.ttl \
32
    extra/pianists-dbpedia.ttl \
33
    extra/styles.ttl ; do
34
    cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples
35
done
36

    
37
sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples
38

    
39
grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \
40
    sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris
41

    
42
diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$
43

    
44
added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'`
45
removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'`
46

    
47
echo "Done, result is in ready.ntriples"
48
echo
49
echo "Composer URI comparison:"
50
echo "Added: $added"
51
echo "Removed: $removed"
52

    
53
rm /tmp/$$