view examples/browser/export/cat2rdf.py @ 648:4b79043f90ba

hack lshlib into happiness with mingw _locking() for file locking; hardcode pagesize as 64k; use lseek()/read()-write()/lseek() and buffers to emulate mmap(). I haven't actually tested all this lshlib functionality, but with this change I can build an audioDB.dll which works well enough to be linked into a binary which seems to work under Wine.
author mas01cr
date Tue, 13 Oct 2009 20:17:06 +0000
parents 901803e1305f
children 1a251dd217c6
line wrap: on
line source
#!/usr/bin/python

import sys
import psycopg2
import psycopg2.extras

from rdflib.Graph import ConjunctiveGraph as Graph
from rdflib import Namespace, Literal, URIRef, BNode, RDF

catalogue = sys.argv[1]

foaf = Namespace("http://xmlns.com/foaf/0.1/")
mo = Namespace("http://purl.org/ontology/mo/")
mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
dc = Namespace("http://purl.org/dc/elements/1.1/")
default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower()

username = "USERNAME"
host = "HOST"
database = "DATABASE"

counters = {}
namespaces = {}

def loadCatalogue(catalogue):
	try:
		conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
	except:
		print "Unable to connect to the database"

	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")

	rows = cursor.fetchall()

	return rows

def createGraph(rows):
	albums = {}

	artists = {
		'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 
		'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 
		'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}

	counter = 1
	for row in rows:
		graph = Graph(identifier = URIRef(default_graph_uri))
		
		# Create all the relevant nodes (with the correct IDs)

		work = getNewNode('work')
		composition = getNewNode('composition')
		track = getNewNode('track')
		signal = getNewNode('signal')
		record = getNewNode('record')
		performance = getNewNode('performance')

		# If we don't have an artist url, make a foaf Agent instead.
		if row['artist']:
			try:
				artist = artists[row['artist']]
			except KeyError:
				artist = getNewNode('artist')
				graph.add((artist, RDF.type, foaf['Agent']))
				graph.add((artist, foaf['name'], Literal(row['artist'].strip())))
				artists[row['artist']] = artist;	

		if row['composer']:
			try:
				composer = artists[row['composer']]
			except KeyError:
				composer = getNewNode('artist')
				graph.add((composer, RDF.type, foaf['Agent']))
				graph.add((composer, foaf['name'], Literal(row['composer'].strip())))
				artists[row['composer']] = composer;	
		else:
			composer = artist


		# Work
		graph.add((work, RDF.type, mo['MusicalWork']))
		
		# Composition
		graph.add((composition, RDF.type, mo['Composition']))
		if composer:
			graph.add((composition, mo['composer'], composer)) 
		graph.add((composition, mo['produced_work'], work))

		# Track
		graph.add((track, RDF.type, mo['Track']))
		if row['artist']:
			graph.add((track, foaf['maker'], artist))
		if row['tracknum']:
			graph.add((track, mo['track_number'], Literal(row['tracknum'])))

		# Album
		try:
			album = albums[row['album']]
		except KeyError:
			album = getNewNode('album')
			graph.add((album, RDF.type, mo['Record']))
			graph.add((album, dc['title'], Literal(row['album'].strip())))
			graph.add((album, mo['release_type'], mo['album']))
			albums[row['album']] = album
		graph.add((album, mo['track'], track))

		# Signal
		graph.add((signal, RDF.type, mo['Signal']))
		graph.add((signal, mo['published_as'], record))
		
		if row['track']:
			graph.add((signal, dc['title'], Literal(row['track'].strip())))
		if row['isrc']:
			graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))

		# Record
		graph.add((record, RDF.type, mo['Record']))
		graph.add((record, mo['publication_of'], signal))
		graph.add((record, mo['track'], track))

		# Performance
		graph.add((performance, RDF.type, mo['Performance']))
		graph.add((performance, mo['performance_of'], work))
		if row['artist']:
			graph.add((performance, mo['performer'], artist))
		graph.add((performance, mo['recorded_as'], signal))

		#graph.close()
		
		graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
		counter += 1
 
def getNewNode(type):
	try:
		count = counters[type]
	except KeyError:
		counters[type] = 1
		count = counters[type]

	try:
		namespace = namespaces[type]
	except KeyError:
		namespaces[type] = Namespace(default_graph_uri+"/"+type+"/")
		namespace = namespaces[type]

	node = namespace[str(count)]
	counters[type] += 1
	return node

createGraph(loadCatalogue(catalogue))