annotate import/build-database.sh @ 53:bcea875d8d2f tip

More build fixes
author Chris Cannam
date Thu, 16 Oct 2014 19:03:51 +0100
parents e856df83c57f
children
rev   line source
Chris@23 1 #!/bin/bash
Chris@23 2
Chris@23 3 # Rebuild the RDF database of composers, works, etc.
Chris@23 4
Chris@23 5 # Do not use this script. Once the database is built, we need to be
Chris@23 6 # able to carry out manual editing, disambiguation etc without risking
Chris@23 7 # losing our changes in a future rebuild. Following the initial build
Chris@23 8 # and publication, we really need to ensure that updates can be made
Chris@23 9 # without a complete rebuild.
Chris@23 10
Chris@23 11 # Perhaps we will need to make partial rebuild scripts (importing some
Chris@23 12 # new type of data that was not in the database at all before, for
Chris@23 13 # example) based on this.
Chris@23 14
Chris@23 15 # All that said, this script is provided anyway for purposes of review
Chris@23 16 # and reproducability.
Chris@23 17
Chris@23 18 echo "Running importer, log is written to importer.log"
Chris@23 19
Chris@25 20 #./importer 2>importer.log || exit 1
Chris@23 21
Chris@23 22 echo "Assembling additional sources"
Chris@23 23
Chris@23 24 rm -f ready.ntriples
Chris@23 25
Chris@23 26 for ttl in \
Chris@23 27 imported.ttl \
Chris@23 28 extra/cmn.ttl \
Chris@23 29 extra/composer-mappings.ttl \
Chris@23 30 extra/conductors.ttl \
Chris@23 31 extra/new-names.ttl \
Chris@23 32 extra/pianists-dbpedia.ttl \
Chris@23 33 extra/styles.ttl ; do
Chris@23 34 cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples
Chris@23 35 done
Chris@23 36
Chris@25 37 sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples
Chris@24 38
Chris@23 39 grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \
Chris@24 40 sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris
Chris@23 41
Chris@23 42 diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$
Chris@23 43
Chris@23 44 added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'`
Chris@23 45 removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'`
Chris@23 46
Chris@23 47 echo "Done, result is in ready.ntriples"
Chris@23 48 echo
Chris@23 49 echo "Composer URI comparison:"
Chris@23 50 echo "Added: $added"
Chris@23 51 echo "Removed: $removed"
Chris@23 52
Chris@23 53 rm /tmp/$$