Mercurial > hg > classical
view import/build-database.sh @ 53:bcea875d8d2f tip
More build fixes
author | Chris Cannam |
---|---|
date | Thu, 16 Oct 2014 19:03:51 +0100 |
parents | e856df83c57f |
children |
line wrap: on
line source
#!/bin/bash # Rebuild the RDF database of composers, works, etc. # Do not use this script. Once the database is built, we need to be # able to carry out manual editing, disambiguation etc without risking # losing our changes in a future rebuild. Following the initial build # and publication, we really need to ensure that updates can be made # without a complete rebuild. # Perhaps we will need to make partial rebuild scripts (importing some # new type of data that was not in the database at all before, for # example) based on this. # All that said, this script is provided anyway for purposes of review # and reproducability. echo "Running importer, log is written to importer.log" #./importer 2>importer.log || exit 1 echo "Assembling additional sources" rm -f ready.ntriples for ttl in \ imported.ttl \ extra/cmn.ttl \ extra/composer-mappings.ttl \ extra/conductors.ttl \ extra/new-names.ttl \ extra/pianists-dbpedia.ttl \ extra/styles.ttl ; do cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples done sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \ sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$ added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'` removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'` echo "Done, result is in ready.ntriples" echo echo "Composer URI comparison:" echo "Added: $added" echo "Removed: $removed" rm /tmp/$$