Mercurial > hg > audiodb
comparison examples/browser/export/cat2rdf.py @ 652:1a251dd217c6
Ontology fine-tuned and included in exporter.
author | mas01mj |
---|---|
date | Thu, 15 Oct 2009 14:24:36 +0000 |
parents | 901803e1305f |
children |
comparison
equal
deleted
inserted
replaced
651:5b2c0d9bc63d | 652:1a251dd217c6 |
---|---|
5 import psycopg2.extras | 5 import psycopg2.extras |
6 | 6 |
7 from rdflib.Graph import ConjunctiveGraph as Graph | 7 from rdflib.Graph import ConjunctiveGraph as Graph |
8 from rdflib import Namespace, Literal, URIRef, BNode, RDF | 8 from rdflib import Namespace, Literal, URIRef, BNode, RDF |
9 | 9 |
10 catalogue = sys.argv[1] | 10 catalogueID = sys.argv[1] |
11 | 11 |
12 foaf = Namespace("http://xmlns.com/foaf/0.1/") | 12 foaf = Namespace("http://xmlns.com/foaf/0.1/") |
13 mo = Namespace("http://purl.org/ontology/mo/") | 13 mo = Namespace("http://purl.org/ontology/mo/") |
14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") | 14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") |
15 dc = Namespace("http://purl.org/dc/elements/1.1/") | 15 dc = Namespace("http://purl.org/dc/elements/1.1/") |
16 default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower() | 16 graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() |
17 audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") | |
18 doap = Namespace("http://usefulinc.com/ns/doap#") | |
17 | 19 |
18 username = "USERNAME" | 20 username = "USERNAME" |
19 host = "HOST" | 21 host = "HOSTNAME" |
20 database = "DATABASE" | 22 database = "DATABASE" |
23 | |
24 | |
25 try: | |
26 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); | |
27 except: | |
28 print "Unable to connect to the database" | |
29 | |
30 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) | |
21 | 31 |
22 counters = {} | 32 counters = {} |
23 namespaces = {} | 33 namespaces = {} |
24 | 34 extractors = {} |
25 def loadCatalogue(catalogue): | 35 projects = {} |
26 try: | 36 databases = {} |
27 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); | 37 |
28 except: | 38 def loadFeatures(catalogueID): |
29 print "Unable to connect to the database" | 39 cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") |
30 | |
31 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) | |
32 | |
33 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""") | |
34 | |
35 rows = cursor.fetchall() | 40 rows = cursor.fetchall() |
36 | |
37 return rows | 41 return rows |
38 | 42 |
39 def createGraph(rows): | 43 def loadCatalogue(catalogueID): |
44 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") | |
45 rows = cursor.fetchall() | |
46 return rows | |
47 | |
48 def createFeatureGraphs(rows): | |
49 albums = {} | |
50 | |
51 graph = Graph(identifier = URIRef(graph_uri)) | |
52 counter = 1 | |
53 databases[catalogueID] = [] | |
54 | |
55 for row in rows: | |
56 | |
57 # Create all the relevant nodes (with the correct IDs) | |
58 database = getNewNode('database') | |
59 databases[catalogueID].append(database) | |
60 feature = getNewNode('feature') | |
61 segmentation = getNewNode('segmentation') | |
62 window = getNewNode('window') | |
63 | |
64 if row['feature'] == "cqt": | |
65 graph.add((feature, RDF.type, audiodb['CQTFeature'])) | |
66 elif row['feature'] == "chr": | |
67 graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) | |
68 elif row['feature'] == "mfcc": | |
69 graph.add((feature, RDF.type, audiodb['MFCCFeature'])) | |
70 | |
71 if row['segtype'] == "frames": | |
72 graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) | |
73 elif row['segtype'] == "beats": | |
74 graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) | |
75 elif row['segtype'] == "segs": | |
76 graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) | |
77 | |
78 if row['windowtype'] == "hamming": | |
79 graph.add((window, RDF.type, audiodb['HammingWindow'])) | |
80 | |
81 graph.add((feature, audiodb["window"], window)) | |
82 graph.add((feature, audiodb["segmentation"], segmentation)) | |
83 | |
84 graph.add((feature, audiodb["dimension"], Literal(row['dim']))) | |
85 graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) | |
86 graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) | |
87 graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) | |
88 graph.add((feature, audiodb["segn"], Literal(row['segn']))) | |
89 graph.add((feature, audiodb["channel"], Literal(row['channel']))) | |
90 graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) | |
91 graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) | |
92 graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) | |
93 | |
94 version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) | |
95 | |
96 project = buildNewProject(graph, row['software']) | |
97 graph.add((project, doap['release'], version)) | |
98 | |
99 graph.add((database, RDF.type, audiodb["Database"])) | |
100 graph.add((database, audiodb["feature"], feature)) | |
101 graph.add((database, audiodb["extractor"], version)) | |
102 | |
103 counter += 1 | |
104 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") | |
105 | |
106 def buildNewExtractor(graph, software, version, platform): | |
107 key = software+"_"+version+"_"+platform | |
108 try: | |
109 extractor = extractors[key] | |
110 except KeyError: | |
111 extractor = getNewNode('extractor') | |
112 graph.add((extractor, RDF.type, doap["Version"])) | |
113 graph.add((extractor, doap['version'], Literal(version))) | |
114 graph.add((extractor, doap['name'], Literal(software))) | |
115 graph.add((extractor, doap['os'], Literal(platform))) | |
116 extractors[key] = extractor | |
117 return extractor | |
118 | |
119 def buildNewProject(graph, software): | |
120 key = software | |
121 try: | |
122 project = projects[key] | |
123 except KeyError: | |
124 project = getNewNode('project') | |
125 graph.add((project, RDF.type, doap["Project"])) | |
126 graph.add((project, doap['name'], Literal(software))) | |
127 projects[key] = project | |
128 return project | |
129 | |
130 def createMediaGraphs(rows): | |
40 albums = {} | 131 albums = {} |
41 | 132 |
42 artists = { | 133 artists = { |
43 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], | 134 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], |
44 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], | 135 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], |
45 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} | 136 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} |
46 | 137 |
47 counter = 1 | 138 counter = 1 |
48 for row in rows: | 139 for row in rows: |
49 graph = Graph(identifier = URIRef(default_graph_uri)) | 140 graph = Graph(identifier = URIRef(graph_uri)) |
50 | |
51 # Create all the relevant nodes (with the correct IDs) | 141 # Create all the relevant nodes (with the correct IDs) |
52 | 142 |
53 work = getNewNode('work') | 143 work = getNewNode('work') |
54 composition = getNewNode('composition') | 144 composition = getNewNode('composition') |
55 track = getNewNode('track') | 145 track = getNewNode('track') |
56 signal = getNewNode('signal') | |
57 record = getNewNode('record') | 146 record = getNewNode('record') |
58 performance = getNewNode('performance') | 147 performance = getNewNode('performance') |
148 signal = Namespace(graph_uri+"/"+row['uid']) | |
59 | 149 |
60 # If we don't have an artist url, make a foaf Agent instead. | 150 # If we don't have an artist url, make a foaf Agent instead. |
61 if row['artist']: | 151 if row['artist']: |
62 try: | 152 try: |
63 artist = artists[row['artist']] | 153 artist = artists[row['artist']] |
93 if row['artist']: | 183 if row['artist']: |
94 graph.add((track, foaf['maker'], artist)) | 184 graph.add((track, foaf['maker'], artist)) |
95 if row['tracknum']: | 185 if row['tracknum']: |
96 graph.add((track, mo['track_number'], Literal(row['tracknum']))) | 186 graph.add((track, mo['track_number'], Literal(row['tracknum']))) |
97 | 187 |
98 # Album | 188 if row['album']: |
99 try: | 189 # Album |
100 album = albums[row['album']] | 190 try: |
101 except KeyError: | 191 album = albums[row['album']] |
102 album = getNewNode('album') | 192 except KeyError: |
103 graph.add((album, RDF.type, mo['Record'])) | 193 album = getNewNode('album') |
104 graph.add((album, dc['title'], Literal(row['album'].strip()))) | 194 graph.add((album, RDF.type, mo['Record'])) |
105 graph.add((album, mo['release_type'], mo['album'])) | 195 graph.add((album, dc['title'], Literal(row['album'].strip()))) |
106 albums[row['album']] = album | 196 graph.add((album, mo['release_type'], mo['album'])) |
107 graph.add((album, mo['track'], track)) | 197 albums[row['album']] = album |
198 graph.add((album, mo['track'], track)) | |
108 | 199 |
109 # Signal | 200 # Signal |
110 graph.add((signal, RDF.type, mo['Signal'])) | 201 graph.add((signal, RDF.type, mo['Signal'])) |
111 graph.add((signal, mo['published_as'], record)) | 202 graph.add((signal, mo['published_as'], record)) |
112 | 203 |
113 if row['track']: | 204 if row['track']: |
114 graph.add((signal, dc['title'], Literal(row['track'].strip()))) | 205 graph.add((signal, dc['title'], Literal(row['track'].strip()))) |
115 if row['isrc']: | 206 if row['isrc']: |
116 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) | 207 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) |
208 | |
209 # Add to the various databases | |
210 dbs = databases[catalogueID] | |
211 for db in dbs: | |
212 graph.add((db, audiodb["has-signal"], signal)) | |
117 | 213 |
118 # Record | 214 # Record |
119 graph.add((record, RDF.type, mo['Record'])) | 215 graph.add((record, RDF.type, mo['Record'])) |
120 graph.add((record, mo['publication_of'], signal)) | 216 graph.add((record, mo['publication_of'], signal)) |
121 graph.add((record, mo['track'], track)) | 217 graph.add((record, mo['track'], track)) |
124 graph.add((performance, RDF.type, mo['Performance'])) | 220 graph.add((performance, RDF.type, mo['Performance'])) |
125 graph.add((performance, mo['performance_of'], work)) | 221 graph.add((performance, mo['performance_of'], work)) |
126 if row['artist']: | 222 if row['artist']: |
127 graph.add((performance, mo['performer'], artist)) | 223 graph.add((performance, mo['performer'], artist)) |
128 graph.add((performance, mo['recorded_as'], signal)) | 224 graph.add((performance, mo['recorded_as'], signal)) |
129 | 225 |
130 #graph.close() | 226 graph.close() |
131 | 227 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") |
132 graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf") | |
133 counter += 1 | 228 counter += 1 |
134 | 229 |
135 def getNewNode(type): | 230 def getNewNode(type): |
136 try: | 231 try: |
137 count = counters[type] | 232 count = counters[type] |
140 count = counters[type] | 235 count = counters[type] |
141 | 236 |
142 try: | 237 try: |
143 namespace = namespaces[type] | 238 namespace = namespaces[type] |
144 except KeyError: | 239 except KeyError: |
145 namespaces[type] = Namespace(default_graph_uri+"/"+type+"/") | 240 namespaces[type] = Namespace(graph_uri+"/"+type+"/") |
146 namespace = namespaces[type] | 241 namespace = namespaces[type] |
147 | 242 |
148 node = namespace[str(count)] | 243 node = namespace[str(count)] |
149 counters[type] += 1 | 244 counters[type] += 1 |
150 return node | 245 return node |
151 | 246 |
152 createGraph(loadCatalogue(catalogue)) | 247 features = loadFeatures(catalogueID) |
248 catalogue = loadCatalogue(catalogueID) | |
249 | |
250 createFeatureGraphs(features) | |
251 createMediaGraphs(catalogue) |