view import/build-database.sh @ 53:bcea875d8d2f tip

More build fixes
author Chris Cannam
date Thu, 16 Oct 2014 19:03:51 +0100
parents e856df83c57f
children
line wrap: on
line source
#!/bin/bash

# Rebuild the RDF database of composers, works, etc.

# Do not use this script.  Once the database is built, we need to be
# able to carry out manual editing, disambiguation etc without risking
# losing our changes in a future rebuild.  Following the initial build
# and publication, we really need to ensure that updates can be made
# without a complete rebuild.

# Perhaps we will need to make partial rebuild scripts (importing some
# new type of data that was not in the database at all before, for
# example) based on this.

# All that said, this script is provided anyway for purposes of review
# and reproducability.

echo "Running importer, log is written to importer.log"

#./importer 2>importer.log || exit 1

echo "Assembling additional sources"

rm -f ready.ntriples

for ttl in \
    imported.ttl \
    extra/cmn.ttl \
    extra/composer-mappings.ttl \
    extra/conductors.ttl \
    extra/new-names.ttl \
    extra/pianists-dbpedia.ttl \
    extra/styles.ttl ; do
    cat extra/prefixes.ttl "$ttl" | rapper -i turtle -o ntriples - http://dbtune.org/classical/resource/ >> ready.ntriples
done

sort ready.ntriples | uniq > ready.2.ntriples && mv ready.2.ntriples ready.ntriples

grep composer ready.ntriples | fgrep -v .html | sed 's/^.*composer\///' | \
    sed 's/>.*//' | grep -v http | sort | uniq > check/new-composer-uris

diff -u check/composer-uris check/new-composer-uris | grep -v '^---' | grep -v '^+++' > /tmp/$$

added=`grep '^+' /tmp/$$ | wc -l | awk '{ print $1; }'`
removed=`grep '^-' /tmp/$$ | wc -l | awk '{ print $1; }'`

echo "Done, result is in ready.ntriples"
echo
echo "Composer URI comparison:"
echo "Added: $added"
echo "Removed: $removed"

rm /tmp/$$