To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / import / build-database.sh
History | View | Annotate | Download (1.66 KB)
| 1 |
#!/bin/bash |
|---|---|
| 2 |
|
| 3 |
# Rebuild the RDF database of composers, works, etc. |
| 4 |
|
| 5 |
# Do not use this script. Once the database is built, we need to be |
| 6 |
# able to carry out manual editing, disambiguation etc without risking |
| 7 |
# losing our changes in a future rebuild. Following the initial build |
| 8 |
# and publication, we really need to ensure that updates can be made |
| 9 |
# without a complete rebuild. |
| 10 |
|
| 11 |
# Perhaps we will need to make partial rebuild scripts (importing some |
| 12 |
# new type of data that was not in the database at all before, for |
| 13 |
# example) based on this. |
| 14 |
|
| 15 |
# All that said, this script is provided anyway for purposes of review |
| 16 |
# and reproducability. |
| 17 |
|
| 18 |
echo "Running importer, log is written to importer.log" |
| 19 |
|
| 20 |
#./importer 2>importer.log || exit 1 |
| 21 |
|
| 22 |
echo "Assembling additional sources" |
| 23 |
|
| 24 |
rm -f ready.ntriples |
| 25 |
|
| 26 |
for ttl in \ |
| 27 |
imported.ttl \ |
| 28 |
extra/cmn.ttl \ |
| 29 |
extra/composer-mappings.ttl \ |
| 30 |
extra/conductors.ttl \ |
| 31 |
extra/new-names.ttl \ |
| 32 |
extra/pianists-dbpedia.ttl \ |
| 33 |
extra/styles.ttl ; do |
| 34 |
cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples |
| 35 |
done |
| 36 |
|
| 37 |
sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples |
| 38 |
|
| 39 |
grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \ |
| 40 |
sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris |
| 41 |
|
| 42 |
diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$ |
| 43 |
|
| 44 |
added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'`
|
| 45 |
removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'`
|
| 46 |
|
| 47 |
echo "Done, result is in ready.ntriples" |
| 48 |
echo |
| 49 |
echo "Composer URI comparison:" |
| 50 |
echo "Added: $added" |
| 51 |
echo "Removed: $removed" |
| 52 |
|
| 53 |
rm /tmp/$$ |