diff examples/browser/export/cat2rdf.py @ 640:901803e1305f

First instance of audioDB browser code.
author mas01mj
date Thu, 08 Oct 2009 11:19:11 +0000
parents
children 1a251dd217c6
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/examples/browser/export/cat2rdf.py	Thu Oct 08 11:19:11 2009 +0000
@@ -0,0 +1,152 @@
+#!/usr/bin/python
+
+import sys
+import psycopg2
+import psycopg2.extras
+
+from rdflib.Graph import ConjunctiveGraph as Graph
+from rdflib import Namespace, Literal, URIRef, BNode, RDF
+
+catalogue = sys.argv[1]
+
+foaf = Namespace("http://xmlns.com/foaf/0.1/")
+mo = Namespace("http://purl.org/ontology/mo/")
+mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
+dc = Namespace("http://purl.org/dc/elements/1.1/")
+default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower()
+
+username = "USERNAME"
+host = "HOST"
+database = "DATABASE"
+
+counters = {}
+namespaces = {}
+
+def loadCatalogue(catalogue):
+	try:
+		conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
+	except:
+		print "Unable to connect to the database"
+
+	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+
+	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")
+
+	rows = cursor.fetchall()
+
+	return rows
+
+def createGraph(rows):
+	albums = {}
+
+	artists = {
+		'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 
+		'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 
+		'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
+
+	counter = 1
+	for row in rows:
+		graph = Graph(identifier = URIRef(default_graph_uri))
+		
+		# Create all the relevant nodes (with the correct IDs)
+
+		work = getNewNode('work')
+		composition = getNewNode('composition')
+		track = getNewNode('track')
+		signal = getNewNode('signal')
+		record = getNewNode('record')
+		performance = getNewNode('performance')
+
+		# If we don't have an artist url, make a foaf Agent instead.
+		if row['artist']:
+			try:
+				artist = artists[row['artist']]
+			except KeyError:
+				artist = getNewNode('artist')
+				graph.add((artist, RDF.type, foaf['Agent']))
+				graph.add((artist, foaf['name'], Literal(row['artist'].strip())))
+				artists[row['artist']] = artist;	
+
+		if row['composer']:
+			try:
+				composer = artists[row['composer']]
+			except KeyError:
+				composer = getNewNode('artist')
+				graph.add((composer, RDF.type, foaf['Agent']))
+				graph.add((composer, foaf['name'], Literal(row['composer'].strip())))
+				artists[row['composer']] = composer;	
+		else:
+			composer = artist
+
+
+		# Work
+		graph.add((work, RDF.type, mo['MusicalWork']))
+		
+		# Composition
+		graph.add((composition, RDF.type, mo['Composition']))
+		if composer:
+			graph.add((composition, mo['composer'], composer)) 
+		graph.add((composition, mo['produced_work'], work))
+
+		# Track
+		graph.add((track, RDF.type, mo['Track']))
+		if row['artist']:
+			graph.add((track, foaf['maker'], artist))
+		if row['tracknum']:
+			graph.add((track, mo['track_number'], Literal(row['tracknum'])))
+
+		# Album
+		try:
+			album = albums[row['album']]
+		except KeyError:
+			album = getNewNode('album')
+			graph.add((album, RDF.type, mo['Record']))
+			graph.add((album, dc['title'], Literal(row['album'].strip())))
+			graph.add((album, mo['release_type'], mo['album']))
+			albums[row['album']] = album
+		graph.add((album, mo['track'], track))
+
+		# Signal
+		graph.add((signal, RDF.type, mo['Signal']))
+		graph.add((signal, mo['published_as'], record))
+		
+		if row['track']:
+			graph.add((signal, dc['title'], Literal(row['track'].strip())))
+		if row['isrc']:
+			graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
+
+		# Record
+		graph.add((record, RDF.type, mo['Record']))
+		graph.add((record, mo['publication_of'], signal))
+		graph.add((record, mo['track'], track))
+
+		# Performance
+		graph.add((performance, RDF.type, mo['Performance']))
+		graph.add((performance, mo['performance_of'], work))
+		if row['artist']:
+			graph.add((performance, mo['performer'], artist))
+		graph.add((performance, mo['recorded_as'], signal))
+
+		#graph.close()
+		
+		graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
+		counter += 1
+ 
+def getNewNode(type):
+	try:
+		count = counters[type]
+	except KeyError:
+		counters[type] = 1
+		count = counters[type]
+
+	try:
+		namespace = namespaces[type]
+	except KeyError:
+		namespaces[type] = Namespace(default_graph_uri+"/"+type+"/")
+		namespace = namespaces[type]
+
+	node = namespace[str(count)]
+	counters[type] += 1
+	return node
+
+createGraph(loadCatalogue(catalogue))