mas01mj@640
|
1 #!/usr/bin/python
|
mas01mj@640
|
2
|
mas01mj@640
|
3 import sys
|
mas01mj@640
|
4 import psycopg2
|
mas01mj@640
|
5 import psycopg2.extras
|
mas01mj@640
|
6
|
mas01mj@640
|
7 from rdflib.Graph import ConjunctiveGraph as Graph
|
mas01mj@640
|
8 from rdflib import Namespace, Literal, URIRef, BNode, RDF
|
mas01mj@640
|
9
|
mas01mj@652
|
10 catalogueID = sys.argv[1]
|
mas01mj@640
|
11
|
mas01mj@640
|
12 foaf = Namespace("http://xmlns.com/foaf/0.1/")
|
mas01mj@640
|
13 mo = Namespace("http://purl.org/ontology/mo/")
|
mas01mj@640
|
14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
|
mas01mj@640
|
15 dc = Namespace("http://purl.org/dc/elements/1.1/")
|
mas01mj@652
|
16 graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower()
|
mas01mj@652
|
17 audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#")
|
mas01mj@652
|
18 doap = Namespace("http://usefulinc.com/ns/doap#")
|
mas01mj@640
|
19
|
mas01mj@640
|
20 username = "USERNAME"
|
mas01mj@652
|
21 host = "HOSTNAME"
|
mas01mj@640
|
22 database = "DATABASE"
|
mas01mj@640
|
23
|
mas01mj@652
|
24
|
mas01mj@652
|
25 try:
|
mas01mj@652
|
26 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
|
mas01mj@652
|
27 except:
|
mas01mj@652
|
28 print "Unable to connect to the database"
|
mas01mj@652
|
29
|
mas01mj@652
|
30 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
mas01mj@652
|
31
|
mas01mj@640
|
32 counters = {}
|
mas01mj@640
|
33 namespaces = {}
|
mas01mj@652
|
34 extractors = {}
|
mas01mj@652
|
35 projects = {}
|
mas01mj@652
|
36 databases = {}
|
mas01mj@640
|
37
|
mas01mj@652
|
38 def loadFeatures(catalogueID):
|
mas01mj@652
|
39 cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""")
|
mas01mj@640
|
40 rows = cursor.fetchall()
|
mas01mj@640
|
41 return rows
|
mas01mj@640
|
42
|
mas01mj@652
|
43 def loadCatalogue(catalogueID):
|
mas01mj@652
|
44 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""")
|
mas01mj@652
|
45 rows = cursor.fetchall()
|
mas01mj@652
|
46 return rows
|
mas01mj@652
|
47
|
mas01mj@652
|
48 def createFeatureGraphs(rows):
|
mas01mj@652
|
49 albums = {}
|
mas01mj@652
|
50
|
mas01mj@652
|
51 graph = Graph(identifier = URIRef(graph_uri))
|
mas01mj@652
|
52 counter = 1
|
mas01mj@652
|
53 databases[catalogueID] = []
|
mas01mj@652
|
54
|
mas01mj@652
|
55 for row in rows:
|
mas01mj@652
|
56
|
mas01mj@652
|
57 # Create all the relevant nodes (with the correct IDs)
|
mas01mj@652
|
58 database = getNewNode('database')
|
mas01mj@652
|
59 databases[catalogueID].append(database)
|
mas01mj@652
|
60 feature = getNewNode('feature')
|
mas01mj@652
|
61 segmentation = getNewNode('segmentation')
|
mas01mj@652
|
62 window = getNewNode('window')
|
mas01mj@652
|
63
|
mas01mj@652
|
64 if row['feature'] == "cqt":
|
mas01mj@652
|
65 graph.add((feature, RDF.type, audiodb['CQTFeature']))
|
mas01mj@652
|
66 elif row['feature'] == "chr":
|
mas01mj@652
|
67 graph.add((feature, RDF.type, audiodb['ChromagramFeature']))
|
mas01mj@652
|
68 elif row['feature'] == "mfcc":
|
mas01mj@652
|
69 graph.add((feature, RDF.type, audiodb['MFCCFeature']))
|
mas01mj@652
|
70
|
mas01mj@652
|
71 if row['segtype'] == "frames":
|
mas01mj@652
|
72 graph.add((segmentation, RDF.type, audiodb['FrameSegmentation']))
|
mas01mj@652
|
73 elif row['segtype'] == "beats":
|
mas01mj@652
|
74 graph.add((segmentation, RDF.type, audiodb['BeatSegmentation']))
|
mas01mj@652
|
75 elif row['segtype'] == "segs":
|
mas01mj@652
|
76 graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation']))
|
mas01mj@652
|
77
|
mas01mj@652
|
78 if row['windowtype'] == "hamming":
|
mas01mj@652
|
79 graph.add((window, RDF.type, audiodb['HammingWindow']))
|
mas01mj@652
|
80
|
mas01mj@652
|
81 graph.add((feature, audiodb["window"], window))
|
mas01mj@652
|
82 graph.add((feature, audiodb["segmentation"], segmentation))
|
mas01mj@652
|
83
|
mas01mj@652
|
84 graph.add((feature, audiodb["dimension"], Literal(row['dim'])))
|
mas01mj@652
|
85 graph.add((feature, audiodb["hop-size"], Literal(row['hopsize'])))
|
mas01mj@652
|
86 graph.add((feature, audiodb["window-length"], Literal(row['winlen'])))
|
mas01mj@652
|
87 graph.add((feature, audiodb["nfft"], Literal(row['nfft'])))
|
mas01mj@652
|
88 graph.add((feature, audiodb["segn"], Literal(row['segn'])))
|
mas01mj@652
|
89 graph.add((feature, audiodb["channel"], Literal(row['channel'])))
|
mas01mj@652
|
90 graph.add((feature, audiodb["loedge"], Literal(row['loedge'])))
|
mas01mj@652
|
91 graph.add((feature, audiodb["hiedge"], Literal(row['hiedge'])))
|
mas01mj@652
|
92 graph.add((feature, audiodb["octaveres"], Literal(row['octaveres'])))
|
mas01mj@652
|
93
|
mas01mj@652
|
94 version = buildNewExtractor(graph, row['software'], row['version'], row['platform'])
|
mas01mj@652
|
95
|
mas01mj@652
|
96 project = buildNewProject(graph, row['software'])
|
mas01mj@652
|
97 graph.add((project, doap['release'], version))
|
mas01mj@652
|
98
|
mas01mj@652
|
99 graph.add((database, RDF.type, audiodb["Database"]))
|
mas01mj@652
|
100 graph.add((database, audiodb["feature"], feature))
|
mas01mj@652
|
101 graph.add((database, audiodb["extractor"], version))
|
mas01mj@652
|
102
|
mas01mj@652
|
103 counter += 1
|
mas01mj@652
|
104 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
|
mas01mj@652
|
105
|
mas01mj@652
|
106 def buildNewExtractor(graph, software, version, platform):
|
mas01mj@652
|
107 key = software+"_"+version+"_"+platform
|
mas01mj@652
|
108 try:
|
mas01mj@652
|
109 extractor = extractors[key]
|
mas01mj@652
|
110 except KeyError:
|
mas01mj@652
|
111 extractor = getNewNode('extractor')
|
mas01mj@652
|
112 graph.add((extractor, RDF.type, doap["Version"]))
|
mas01mj@652
|
113 graph.add((extractor, doap['version'], Literal(version)))
|
mas01mj@652
|
114 graph.add((extractor, doap['name'], Literal(software)))
|
mas01mj@652
|
115 graph.add((extractor, doap['os'], Literal(platform)))
|
mas01mj@652
|
116 extractors[key] = extractor
|
mas01mj@652
|
117 return extractor
|
mas01mj@652
|
118
|
mas01mj@652
|
119 def buildNewProject(graph, software):
|
mas01mj@652
|
120 key = software
|
mas01mj@652
|
121 try:
|
mas01mj@652
|
122 project = projects[key]
|
mas01mj@652
|
123 except KeyError:
|
mas01mj@652
|
124 project = getNewNode('project')
|
mas01mj@652
|
125 graph.add((project, RDF.type, doap["Project"]))
|
mas01mj@652
|
126 graph.add((project, doap['name'], Literal(software)))
|
mas01mj@652
|
127 projects[key] = project
|
mas01mj@652
|
128 return project
|
mas01mj@652
|
129
|
mas01mj@652
|
130 def createMediaGraphs(rows):
|
mas01mj@640
|
131 albums = {}
|
mas01mj@640
|
132
|
mas01mj@640
|
133 artists = {
|
mas01mj@640
|
134 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'],
|
mas01mj@640
|
135 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'],
|
mas01mj@640
|
136 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
|
mas01mj@640
|
137
|
mas01mj@640
|
138 counter = 1
|
mas01mj@640
|
139 for row in rows:
|
mas01mj@652
|
140 graph = Graph(identifier = URIRef(graph_uri))
|
mas01mj@640
|
141 # Create all the relevant nodes (with the correct IDs)
|
mas01mj@640
|
142
|
mas01mj@640
|
143 work = getNewNode('work')
|
mas01mj@640
|
144 composition = getNewNode('composition')
|
mas01mj@640
|
145 track = getNewNode('track')
|
mas01mj@640
|
146 record = getNewNode('record')
|
mas01mj@640
|
147 performance = getNewNode('performance')
|
mas01mj@652
|
148 signal = Namespace(graph_uri+"/"+row['uid'])
|
mas01mj@640
|
149
|
mas01mj@640
|
150 # If we don't have an artist url, make a foaf Agent instead.
|
mas01mj@640
|
151 if row['artist']:
|
mas01mj@640
|
152 try:
|
mas01mj@640
|
153 artist = artists[row['artist']]
|
mas01mj@640
|
154 except KeyError:
|
mas01mj@640
|
155 artist = getNewNode('artist')
|
mas01mj@640
|
156 graph.add((artist, RDF.type, foaf['Agent']))
|
mas01mj@640
|
157 graph.add((artist, foaf['name'], Literal(row['artist'].strip())))
|
mas01mj@640
|
158 artists[row['artist']] = artist;
|
mas01mj@640
|
159
|
mas01mj@640
|
160 if row['composer']:
|
mas01mj@640
|
161 try:
|
mas01mj@640
|
162 composer = artists[row['composer']]
|
mas01mj@640
|
163 except KeyError:
|
mas01mj@640
|
164 composer = getNewNode('artist')
|
mas01mj@640
|
165 graph.add((composer, RDF.type, foaf['Agent']))
|
mas01mj@640
|
166 graph.add((composer, foaf['name'], Literal(row['composer'].strip())))
|
mas01mj@640
|
167 artists[row['composer']] = composer;
|
mas01mj@640
|
168 else:
|
mas01mj@640
|
169 composer = artist
|
mas01mj@640
|
170
|
mas01mj@640
|
171
|
mas01mj@640
|
172 # Work
|
mas01mj@640
|
173 graph.add((work, RDF.type, mo['MusicalWork']))
|
mas01mj@640
|
174
|
mas01mj@640
|
175 # Composition
|
mas01mj@640
|
176 graph.add((composition, RDF.type, mo['Composition']))
|
mas01mj@640
|
177 if composer:
|
mas01mj@640
|
178 graph.add((composition, mo['composer'], composer))
|
mas01mj@640
|
179 graph.add((composition, mo['produced_work'], work))
|
mas01mj@640
|
180
|
mas01mj@640
|
181 # Track
|
mas01mj@640
|
182 graph.add((track, RDF.type, mo['Track']))
|
mas01mj@640
|
183 if row['artist']:
|
mas01mj@640
|
184 graph.add((track, foaf['maker'], artist))
|
mas01mj@640
|
185 if row['tracknum']:
|
mas01mj@640
|
186 graph.add((track, mo['track_number'], Literal(row['tracknum'])))
|
mas01mj@640
|
187
|
mas01mj@652
|
188 if row['album']:
|
mas01mj@652
|
189 # Album
|
mas01mj@652
|
190 try:
|
mas01mj@652
|
191 album = albums[row['album']]
|
mas01mj@652
|
192 except KeyError:
|
mas01mj@652
|
193 album = getNewNode('album')
|
mas01mj@652
|
194 graph.add((album, RDF.type, mo['Record']))
|
mas01mj@652
|
195 graph.add((album, dc['title'], Literal(row['album'].strip())))
|
mas01mj@652
|
196 graph.add((album, mo['release_type'], mo['album']))
|
mas01mj@652
|
197 albums[row['album']] = album
|
mas01mj@652
|
198 graph.add((album, mo['track'], track))
|
mas01mj@640
|
199
|
mas01mj@640
|
200 # Signal
|
mas01mj@640
|
201 graph.add((signal, RDF.type, mo['Signal']))
|
mas01mj@640
|
202 graph.add((signal, mo['published_as'], record))
|
mas01mj@640
|
203
|
mas01mj@640
|
204 if row['track']:
|
mas01mj@640
|
205 graph.add((signal, dc['title'], Literal(row['track'].strip())))
|
mas01mj@640
|
206 if row['isrc']:
|
mas01mj@640
|
207 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
|
mas01mj@640
|
208
|
mas01mj@652
|
209 # Add to the various databases
|
mas01mj@652
|
210 dbs = databases[catalogueID]
|
mas01mj@652
|
211 for db in dbs:
|
mas01mj@652
|
212 graph.add((db, audiodb["has-signal"], signal))
|
mas01mj@652
|
213
|
mas01mj@640
|
214 # Record
|
mas01mj@640
|
215 graph.add((record, RDF.type, mo['Record']))
|
mas01mj@640
|
216 graph.add((record, mo['publication_of'], signal))
|
mas01mj@640
|
217 graph.add((record, mo['track'], track))
|
mas01mj@640
|
218
|
mas01mj@640
|
219 # Performance
|
mas01mj@640
|
220 graph.add((performance, RDF.type, mo['Performance']))
|
mas01mj@640
|
221 graph.add((performance, mo['performance_of'], work))
|
mas01mj@640
|
222 if row['artist']:
|
mas01mj@640
|
223 graph.add((performance, mo['performer'], artist))
|
mas01mj@640
|
224 graph.add((performance, mo['recorded_as'], signal))
|
mas01mj@640
|
225
|
mas01mj@652
|
226 graph.close()
|
mas01mj@652
|
227 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf")
|
mas01mj@640
|
228 counter += 1
|
mas01mj@640
|
229
|
mas01mj@640
|
230 def getNewNode(type):
|
mas01mj@640
|
231 try:
|
mas01mj@640
|
232 count = counters[type]
|
mas01mj@640
|
233 except KeyError:
|
mas01mj@640
|
234 counters[type] = 1
|
mas01mj@640
|
235 count = counters[type]
|
mas01mj@640
|
236
|
mas01mj@640
|
237 try:
|
mas01mj@640
|
238 namespace = namespaces[type]
|
mas01mj@640
|
239 except KeyError:
|
mas01mj@652
|
240 namespaces[type] = Namespace(graph_uri+"/"+type+"/")
|
mas01mj@640
|
241 namespace = namespaces[type]
|
mas01mj@640
|
242
|
mas01mj@640
|
243 node = namespace[str(count)]
|
mas01mj@640
|
244 counters[type] += 1
|
mas01mj@640
|
245 return node
|
mas01mj@640
|
246
|
mas01mj@652
|
247 features = loadFeatures(catalogueID)
|
mas01mj@652
|
248 catalogue = loadCatalogue(catalogueID)
|
mas01mj@652
|
249
|
mas01mj@652
|
250 createFeatureGraphs(features)
|
mas01mj@652
|
251 createMediaGraphs(catalogue)
|