comparison examples/browser/export/cat2rdf.py @ 652:1a251dd217c6

Ontology fine-tuned and included in exporter.
author mas01mj
date Thu, 15 Oct 2009 14:24:36 +0000
parents 901803e1305f
children
comparison
equal deleted inserted replaced
651:5b2c0d9bc63d 652:1a251dd217c6
5 import psycopg2.extras 5 import psycopg2.extras
6 6
7 from rdflib.Graph import ConjunctiveGraph as Graph 7 from rdflib.Graph import ConjunctiveGraph as Graph
8 from rdflib import Namespace, Literal, URIRef, BNode, RDF 8 from rdflib import Namespace, Literal, URIRef, BNode, RDF
9 9
10 catalogue = sys.argv[1] 10 catalogueID = sys.argv[1]
11 11
12 foaf = Namespace("http://xmlns.com/foaf/0.1/") 12 foaf = Namespace("http://xmlns.com/foaf/0.1/")
13 mo = Namespace("http://purl.org/ontology/mo/") 13 mo = Namespace("http://purl.org/ontology/mo/")
14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") 14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
15 dc = Namespace("http://purl.org/dc/elements/1.1/") 15 dc = Namespace("http://purl.org/dc/elements/1.1/")
16 default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower() 16 graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower()
17 audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#")
18 doap = Namespace("http://usefulinc.com/ns/doap#")
17 19
18 username = "USERNAME" 20 username = "USERNAME"
19 host = "HOST" 21 host = "HOSTNAME"
20 database = "DATABASE" 22 database = "DATABASE"
23
24
25 try:
26 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
27 except:
28 print "Unable to connect to the database"
29
30 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
21 31
22 counters = {} 32 counters = {}
23 namespaces = {} 33 namespaces = {}
24 34 extractors = {}
25 def loadCatalogue(catalogue): 35 projects = {}
26 try: 36 databases = {}
27 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); 37
28 except: 38 def loadFeatures(catalogueID):
29 print "Unable to connect to the database" 39 cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""")
30
31 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
32
33 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")
34
35 rows = cursor.fetchall() 40 rows = cursor.fetchall()
36
37 return rows 41 return rows
38 42
39 def createGraph(rows): 43 def loadCatalogue(catalogueID):
44 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""")
45 rows = cursor.fetchall()
46 return rows
47
48 def createFeatureGraphs(rows):
49 albums = {}
50
51 graph = Graph(identifier = URIRef(graph_uri))
52 counter = 1
53 databases[catalogueID] = []
54
55 for row in rows:
56
57 # Create all the relevant nodes (with the correct IDs)
58 database = getNewNode('database')
59 databases[catalogueID].append(database)
60 feature = getNewNode('feature')
61 segmentation = getNewNode('segmentation')
62 window = getNewNode('window')
63
64 if row['feature'] == "cqt":
65 graph.add((feature, RDF.type, audiodb['CQTFeature']))
66 elif row['feature'] == "chr":
67 graph.add((feature, RDF.type, audiodb['ChromagramFeature']))
68 elif row['feature'] == "mfcc":
69 graph.add((feature, RDF.type, audiodb['MFCCFeature']))
70
71 if row['segtype'] == "frames":
72 graph.add((segmentation, RDF.type, audiodb['FrameSegmentation']))
73 elif row['segtype'] == "beats":
74 graph.add((segmentation, RDF.type, audiodb['BeatSegmentation']))
75 elif row['segtype'] == "segs":
76 graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation']))
77
78 if row['windowtype'] == "hamming":
79 graph.add((window, RDF.type, audiodb['HammingWindow']))
80
81 graph.add((feature, audiodb["window"], window))
82 graph.add((feature, audiodb["segmentation"], segmentation))
83
84 graph.add((feature, audiodb["dimension"], Literal(row['dim'])))
85 graph.add((feature, audiodb["hop-size"], Literal(row['hopsize'])))
86 graph.add((feature, audiodb["window-length"], Literal(row['winlen'])))
87 graph.add((feature, audiodb["nfft"], Literal(row['nfft'])))
88 graph.add((feature, audiodb["segn"], Literal(row['segn'])))
89 graph.add((feature, audiodb["channel"], Literal(row['channel'])))
90 graph.add((feature, audiodb["loedge"], Literal(row['loedge'])))
91 graph.add((feature, audiodb["hiedge"], Literal(row['hiedge'])))
92 graph.add((feature, audiodb["octaveres"], Literal(row['octaveres'])))
93
94 version = buildNewExtractor(graph, row['software'], row['version'], row['platform'])
95
96 project = buildNewProject(graph, row['software'])
97 graph.add((project, doap['release'], version))
98
99 graph.add((database, RDF.type, audiodb["Database"]))
100 graph.add((database, audiodb["feature"], feature))
101 graph.add((database, audiodb["extractor"], version))
102
103 counter += 1
104 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
105
106 def buildNewExtractor(graph, software, version, platform):
107 key = software+"_"+version+"_"+platform
108 try:
109 extractor = extractors[key]
110 except KeyError:
111 extractor = getNewNode('extractor')
112 graph.add((extractor, RDF.type, doap["Version"]))
113 graph.add((extractor, doap['version'], Literal(version)))
114 graph.add((extractor, doap['name'], Literal(software)))
115 graph.add((extractor, doap['os'], Literal(platform)))
116 extractors[key] = extractor
117 return extractor
118
119 def buildNewProject(graph, software):
120 key = software
121 try:
122 project = projects[key]
123 except KeyError:
124 project = getNewNode('project')
125 graph.add((project, RDF.type, doap["Project"]))
126 graph.add((project, doap['name'], Literal(software)))
127 projects[key] = project
128 return project
129
130 def createMediaGraphs(rows):
40 albums = {} 131 albums = {}
41 132
42 artists = { 133 artists = {
43 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 134 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'],
44 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 135 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'],
45 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} 136 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
46 137
47 counter = 1 138 counter = 1
48 for row in rows: 139 for row in rows:
49 graph = Graph(identifier = URIRef(default_graph_uri)) 140 graph = Graph(identifier = URIRef(graph_uri))
50
51 # Create all the relevant nodes (with the correct IDs) 141 # Create all the relevant nodes (with the correct IDs)
52 142
53 work = getNewNode('work') 143 work = getNewNode('work')
54 composition = getNewNode('composition') 144 composition = getNewNode('composition')
55 track = getNewNode('track') 145 track = getNewNode('track')
56 signal = getNewNode('signal')
57 record = getNewNode('record') 146 record = getNewNode('record')
58 performance = getNewNode('performance') 147 performance = getNewNode('performance')
148 signal = Namespace(graph_uri+"/"+row['uid'])
59 149
60 # If we don't have an artist url, make a foaf Agent instead. 150 # If we don't have an artist url, make a foaf Agent instead.
61 if row['artist']: 151 if row['artist']:
62 try: 152 try:
63 artist = artists[row['artist']] 153 artist = artists[row['artist']]
93 if row['artist']: 183 if row['artist']:
94 graph.add((track, foaf['maker'], artist)) 184 graph.add((track, foaf['maker'], artist))
95 if row['tracknum']: 185 if row['tracknum']:
96 graph.add((track, mo['track_number'], Literal(row['tracknum']))) 186 graph.add((track, mo['track_number'], Literal(row['tracknum'])))
97 187
98 # Album 188 if row['album']:
99 try: 189 # Album
100 album = albums[row['album']] 190 try:
101 except KeyError: 191 album = albums[row['album']]
102 album = getNewNode('album') 192 except KeyError:
103 graph.add((album, RDF.type, mo['Record'])) 193 album = getNewNode('album')
104 graph.add((album, dc['title'], Literal(row['album'].strip()))) 194 graph.add((album, RDF.type, mo['Record']))
105 graph.add((album, mo['release_type'], mo['album'])) 195 graph.add((album, dc['title'], Literal(row['album'].strip())))
106 albums[row['album']] = album 196 graph.add((album, mo['release_type'], mo['album']))
107 graph.add((album, mo['track'], track)) 197 albums[row['album']] = album
198 graph.add((album, mo['track'], track))
108 199
109 # Signal 200 # Signal
110 graph.add((signal, RDF.type, mo['Signal'])) 201 graph.add((signal, RDF.type, mo['Signal']))
111 graph.add((signal, mo['published_as'], record)) 202 graph.add((signal, mo['published_as'], record))
112 203
113 if row['track']: 204 if row['track']:
114 graph.add((signal, dc['title'], Literal(row['track'].strip()))) 205 graph.add((signal, dc['title'], Literal(row['track'].strip())))
115 if row['isrc']: 206 if row['isrc']:
116 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) 207 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
208
209 # Add to the various databases
210 dbs = databases[catalogueID]
211 for db in dbs:
212 graph.add((db, audiodb["has-signal"], signal))
117 213
118 # Record 214 # Record
119 graph.add((record, RDF.type, mo['Record'])) 215 graph.add((record, RDF.type, mo['Record']))
120 graph.add((record, mo['publication_of'], signal)) 216 graph.add((record, mo['publication_of'], signal))
121 graph.add((record, mo['track'], track)) 217 graph.add((record, mo['track'], track))
124 graph.add((performance, RDF.type, mo['Performance'])) 220 graph.add((performance, RDF.type, mo['Performance']))
125 graph.add((performance, mo['performance_of'], work)) 221 graph.add((performance, mo['performance_of'], work))
126 if row['artist']: 222 if row['artist']:
127 graph.add((performance, mo['performer'], artist)) 223 graph.add((performance, mo['performer'], artist))
128 graph.add((performance, mo['recorded_as'], signal)) 224 graph.add((performance, mo['recorded_as'], signal))
129 225
130 #graph.close() 226 graph.close()
131 227 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf")
132 graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
133 counter += 1 228 counter += 1
134 229
135 def getNewNode(type): 230 def getNewNode(type):
136 try: 231 try:
137 count = counters[type] 232 count = counters[type]
140 count = counters[type] 235 count = counters[type]
141 236
142 try: 237 try:
143 namespace = namespaces[type] 238 namespace = namespaces[type]
144 except KeyError: 239 except KeyError:
145 namespaces[type] = Namespace(default_graph_uri+"/"+type+"/") 240 namespaces[type] = Namespace(graph_uri+"/"+type+"/")
146 namespace = namespaces[type] 241 namespace = namespaces[type]
147 242
148 node = namespace[str(count)] 243 node = namespace[str(count)]
149 counters[type] += 1 244 counters[type] += 1
150 return node 245 return node
151 246
152 createGraph(loadCatalogue(catalogue)) 247 features = loadFeatures(catalogueID)
248 catalogue = loadCatalogue(catalogueID)
249
250 createFeatureGraphs(features)
251 createMediaGraphs(catalogue)