mas01mj@640
|
1 #!/usr/bin/python
|
mas01mj@640
|
2
|
mas01mj@640
|
3 import sys
|
mas01mj@640
|
4 import psycopg2
|
mas01mj@640
|
5 import psycopg2.extras
|
mas01mj@640
|
6
|
mas01mj@640
|
7 from rdflib.Graph import ConjunctiveGraph as Graph
|
mas01mj@640
|
8 from rdflib import Namespace, Literal, URIRef, BNode, RDF
|
mas01mj@640
|
9
|
mas01mj@640
|
10 catalogue = sys.argv[1]
|
mas01mj@640
|
11
|
mas01mj@640
|
12 foaf = Namespace("http://xmlns.com/foaf/0.1/")
|
mas01mj@640
|
13 mo = Namespace("http://purl.org/ontology/mo/")
|
mas01mj@640
|
14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
|
mas01mj@640
|
15 dc = Namespace("http://purl.org/dc/elements/1.1/")
|
mas01mj@640
|
16 default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower()
|
mas01mj@640
|
17
|
mas01mj@640
|
18 username = "USERNAME"
|
mas01mj@640
|
19 host = "HOST"
|
mas01mj@640
|
20 database = "DATABASE"
|
mas01mj@640
|
21
|
mas01mj@640
|
22 counters = {}
|
mas01mj@640
|
23 namespaces = {}
|
mas01mj@640
|
24
|
mas01mj@640
|
25 def loadCatalogue(catalogue):
|
mas01mj@640
|
26 try:
|
mas01mj@640
|
27 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
|
mas01mj@640
|
28 except:
|
mas01mj@640
|
29 print "Unable to connect to the database"
|
mas01mj@640
|
30
|
mas01mj@640
|
31 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
mas01mj@640
|
32
|
mas01mj@640
|
33 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")
|
mas01mj@640
|
34
|
mas01mj@640
|
35 rows = cursor.fetchall()
|
mas01mj@640
|
36
|
mas01mj@640
|
37 return rows
|
mas01mj@640
|
38
|
mas01mj@640
|
39 def createGraph(rows):
|
mas01mj@640
|
40 albums = {}
|
mas01mj@640
|
41
|
mas01mj@640
|
42 artists = {
|
mas01mj@640
|
43 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'],
|
mas01mj@640
|
44 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'],
|
mas01mj@640
|
45 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
|
mas01mj@640
|
46
|
mas01mj@640
|
47 counter = 1
|
mas01mj@640
|
48 for row in rows:
|
mas01mj@640
|
49 graph = Graph(identifier = URIRef(default_graph_uri))
|
mas01mj@640
|
50
|
mas01mj@640
|
51 # Create all the relevant nodes (with the correct IDs)
|
mas01mj@640
|
52
|
mas01mj@640
|
53 work = getNewNode('work')
|
mas01mj@640
|
54 composition = getNewNode('composition')
|
mas01mj@640
|
55 track = getNewNode('track')
|
mas01mj@640
|
56 signal = getNewNode('signal')
|
mas01mj@640
|
57 record = getNewNode('record')
|
mas01mj@640
|
58 performance = getNewNode('performance')
|
mas01mj@640
|
59
|
mas01mj@640
|
60 # If we don't have an artist url, make a foaf Agent instead.
|
mas01mj@640
|
61 if row['artist']:
|
mas01mj@640
|
62 try:
|
mas01mj@640
|
63 artist = artists[row['artist']]
|
mas01mj@640
|
64 except KeyError:
|
mas01mj@640
|
65 artist = getNewNode('artist')
|
mas01mj@640
|
66 graph.add((artist, RDF.type, foaf['Agent']))
|
mas01mj@640
|
67 graph.add((artist, foaf['name'], Literal(row['artist'].strip())))
|
mas01mj@640
|
68 artists[row['artist']] = artist;
|
mas01mj@640
|
69
|
mas01mj@640
|
70 if row['composer']:
|
mas01mj@640
|
71 try:
|
mas01mj@640
|
72 composer = artists[row['composer']]
|
mas01mj@640
|
73 except KeyError:
|
mas01mj@640
|
74 composer = getNewNode('artist')
|
mas01mj@640
|
75 graph.add((composer, RDF.type, foaf['Agent']))
|
mas01mj@640
|
76 graph.add((composer, foaf['name'], Literal(row['composer'].strip())))
|
mas01mj@640
|
77 artists[row['composer']] = composer;
|
mas01mj@640
|
78 else:
|
mas01mj@640
|
79 composer = artist
|
mas01mj@640
|
80
|
mas01mj@640
|
81
|
mas01mj@640
|
82 # Work
|
mas01mj@640
|
83 graph.add((work, RDF.type, mo['MusicalWork']))
|
mas01mj@640
|
84
|
mas01mj@640
|
85 # Composition
|
mas01mj@640
|
86 graph.add((composition, RDF.type, mo['Composition']))
|
mas01mj@640
|
87 if composer:
|
mas01mj@640
|
88 graph.add((composition, mo['composer'], composer))
|
mas01mj@640
|
89 graph.add((composition, mo['produced_work'], work))
|
mas01mj@640
|
90
|
mas01mj@640
|
91 # Track
|
mas01mj@640
|
92 graph.add((track, RDF.type, mo['Track']))
|
mas01mj@640
|
93 if row['artist']:
|
mas01mj@640
|
94 graph.add((track, foaf['maker'], artist))
|
mas01mj@640
|
95 if row['tracknum']:
|
mas01mj@640
|
96 graph.add((track, mo['track_number'], Literal(row['tracknum'])))
|
mas01mj@640
|
97
|
mas01mj@640
|
98 # Album
|
mas01mj@640
|
99 try:
|
mas01mj@640
|
100 album = albums[row['album']]
|
mas01mj@640
|
101 except KeyError:
|
mas01mj@640
|
102 album = getNewNode('album')
|
mas01mj@640
|
103 graph.add((album, RDF.type, mo['Record']))
|
mas01mj@640
|
104 graph.add((album, dc['title'], Literal(row['album'].strip())))
|
mas01mj@640
|
105 graph.add((album, mo['release_type'], mo['album']))
|
mas01mj@640
|
106 albums[row['album']] = album
|
mas01mj@640
|
107 graph.add((album, mo['track'], track))
|
mas01mj@640
|
108
|
mas01mj@640
|
109 # Signal
|
mas01mj@640
|
110 graph.add((signal, RDF.type, mo['Signal']))
|
mas01mj@640
|
111 graph.add((signal, mo['published_as'], record))
|
mas01mj@640
|
112
|
mas01mj@640
|
113 if row['track']:
|
mas01mj@640
|
114 graph.add((signal, dc['title'], Literal(row['track'].strip())))
|
mas01mj@640
|
115 if row['isrc']:
|
mas01mj@640
|
116 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
|
mas01mj@640
|
117
|
mas01mj@640
|
118 # Record
|
mas01mj@640
|
119 graph.add((record, RDF.type, mo['Record']))
|
mas01mj@640
|
120 graph.add((record, mo['publication_of'], signal))
|
mas01mj@640
|
121 graph.add((record, mo['track'], track))
|
mas01mj@640
|
122
|
mas01mj@640
|
123 # Performance
|
mas01mj@640
|
124 graph.add((performance, RDF.type, mo['Performance']))
|
mas01mj@640
|
125 graph.add((performance, mo['performance_of'], work))
|
mas01mj@640
|
126 if row['artist']:
|
mas01mj@640
|
127 graph.add((performance, mo['performer'], artist))
|
mas01mj@640
|
128 graph.add((performance, mo['recorded_as'], signal))
|
mas01mj@640
|
129
|
mas01mj@640
|
130 #graph.close()
|
mas01mj@640
|
131
|
mas01mj@640
|
132 graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
|
mas01mj@640
|
133 counter += 1
|
mas01mj@640
|
134
|
mas01mj@640
|
135 def getNewNode(type):
|
mas01mj@640
|
136 try:
|
mas01mj@640
|
137 count = counters[type]
|
mas01mj@640
|
138 except KeyError:
|
mas01mj@640
|
139 counters[type] = 1
|
mas01mj@640
|
140 count = counters[type]
|
mas01mj@640
|
141
|
mas01mj@640
|
142 try:
|
mas01mj@640
|
143 namespace = namespaces[type]
|
mas01mj@640
|
144 except KeyError:
|
mas01mj@640
|
145 namespaces[type] = Namespace(default_graph_uri+"/"+type+"/")
|
mas01mj@640
|
146 namespace = namespaces[type]
|
mas01mj@640
|
147
|
mas01mj@640
|
148 node = namespace[str(count)]
|
mas01mj@640
|
149 counters[type] += 1
|
mas01mj@640
|
150 return node
|
mas01mj@640
|
151
|
mas01mj@640
|
152 createGraph(loadCatalogue(catalogue))
|