mas01mj@640: #!/usr/bin/python mas01mj@640: mas01mj@640: import sys mas01mj@640: import psycopg2 mas01mj@640: import psycopg2.extras mas01mj@640: mas01mj@640: from rdflib.Graph import ConjunctiveGraph as Graph mas01mj@640: from rdflib import Namespace, Literal, URIRef, BNode, RDF mas01mj@640: mas01mj@640: catalogue = sys.argv[1] mas01mj@640: mas01mj@640: foaf = Namespace("http://xmlns.com/foaf/0.1/") mas01mj@640: mo = Namespace("http://purl.org/ontology/mo/") mas01mj@640: mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") mas01mj@640: dc = Namespace("http://purl.org/dc/elements/1.1/") mas01mj@640: default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower() mas01mj@640: mas01mj@640: username = "USERNAME" mas01mj@640: host = "HOST" mas01mj@640: database = "DATABASE" mas01mj@640: mas01mj@640: counters = {} mas01mj@640: namespaces = {} mas01mj@640: mas01mj@640: def loadCatalogue(catalogue): mas01mj@640: try: mas01mj@640: conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); mas01mj@640: except: mas01mj@640: print "Unable to connect to the database" mas01mj@640: mas01mj@640: cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) mas01mj@640: mas01mj@640: cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""") mas01mj@640: mas01mj@640: rows = cursor.fetchall() mas01mj@640: mas01mj@640: return rows mas01mj@640: mas01mj@640: def createGraph(rows): mas01mj@640: albums = {} mas01mj@640: mas01mj@640: artists = { mas01mj@640: 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], mas01mj@640: 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], mas01mj@640: 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} mas01mj@640: mas01mj@640: counter = 1 mas01mj@640: for row in rows: mas01mj@640: graph = Graph(identifier = URIRef(default_graph_uri)) mas01mj@640: mas01mj@640: # Create all the relevant nodes (with the correct IDs) mas01mj@640: mas01mj@640: work = getNewNode('work') mas01mj@640: composition = getNewNode('composition') mas01mj@640: track = getNewNode('track') mas01mj@640: signal = getNewNode('signal') mas01mj@640: record = getNewNode('record') mas01mj@640: performance = getNewNode('performance') mas01mj@640: mas01mj@640: # If we don't have an artist url, make a foaf Agent instead. mas01mj@640: if row['artist']: mas01mj@640: try: mas01mj@640: artist = artists[row['artist']] mas01mj@640: except KeyError: mas01mj@640: artist = getNewNode('artist') mas01mj@640: graph.add((artist, RDF.type, foaf['Agent'])) mas01mj@640: graph.add((artist, foaf['name'], Literal(row['artist'].strip()))) mas01mj@640: artists[row['artist']] = artist; mas01mj@640: mas01mj@640: if row['composer']: mas01mj@640: try: mas01mj@640: composer = artists[row['composer']] mas01mj@640: except KeyError: mas01mj@640: composer = getNewNode('artist') mas01mj@640: graph.add((composer, RDF.type, foaf['Agent'])) mas01mj@640: graph.add((composer, foaf['name'], Literal(row['composer'].strip()))) mas01mj@640: artists[row['composer']] = composer; mas01mj@640: else: mas01mj@640: composer = artist mas01mj@640: mas01mj@640: mas01mj@640: # Work mas01mj@640: graph.add((work, RDF.type, mo['MusicalWork'])) mas01mj@640: mas01mj@640: # Composition mas01mj@640: graph.add((composition, RDF.type, mo['Composition'])) mas01mj@640: if composer: mas01mj@640: graph.add((composition, mo['composer'], composer)) mas01mj@640: graph.add((composition, mo['produced_work'], work)) mas01mj@640: mas01mj@640: # Track mas01mj@640: graph.add((track, RDF.type, mo['Track'])) mas01mj@640: if row['artist']: mas01mj@640: graph.add((track, foaf['maker'], artist)) mas01mj@640: if row['tracknum']: mas01mj@640: graph.add((track, mo['track_number'], Literal(row['tracknum']))) mas01mj@640: mas01mj@640: # Album mas01mj@640: try: mas01mj@640: album = albums[row['album']] mas01mj@640: except KeyError: mas01mj@640: album = getNewNode('album') mas01mj@640: graph.add((album, RDF.type, mo['Record'])) mas01mj@640: graph.add((album, dc['title'], Literal(row['album'].strip()))) mas01mj@640: graph.add((album, mo['release_type'], mo['album'])) mas01mj@640: albums[row['album']] = album mas01mj@640: graph.add((album, mo['track'], track)) mas01mj@640: mas01mj@640: # Signal mas01mj@640: graph.add((signal, RDF.type, mo['Signal'])) mas01mj@640: graph.add((signal, mo['published_as'], record)) mas01mj@640: mas01mj@640: if row['track']: mas01mj@640: graph.add((signal, dc['title'], Literal(row['track'].strip()))) mas01mj@640: if row['isrc']: mas01mj@640: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) mas01mj@640: mas01mj@640: # Record mas01mj@640: graph.add((record, RDF.type, mo['Record'])) mas01mj@640: graph.add((record, mo['publication_of'], signal)) mas01mj@640: graph.add((record, mo['track'], track)) mas01mj@640: mas01mj@640: # Performance mas01mj@640: graph.add((performance, RDF.type, mo['Performance'])) mas01mj@640: graph.add((performance, mo['performance_of'], work)) mas01mj@640: if row['artist']: mas01mj@640: graph.add((performance, mo['performer'], artist)) mas01mj@640: graph.add((performance, mo['recorded_as'], signal)) mas01mj@640: mas01mj@640: #graph.close() mas01mj@640: mas01mj@640: graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf") mas01mj@640: counter += 1 mas01mj@640: mas01mj@640: def getNewNode(type): mas01mj@640: try: mas01mj@640: count = counters[type] mas01mj@640: except KeyError: mas01mj@640: counters[type] = 1 mas01mj@640: count = counters[type] mas01mj@640: mas01mj@640: try: mas01mj@640: namespace = namespaces[type] mas01mj@640: except KeyError: mas01mj@640: namespaces[type] = Namespace(default_graph_uri+"/"+type+"/") mas01mj@640: namespace = namespaces[type] mas01mj@640: mas01mj@640: node = namespace[str(count)] mas01mj@640: counters[type] += 1 mas01mj@640: return node mas01mj@640: mas01mj@640: createGraph(loadCatalogue(catalogue))