annotate examples/browser/export/cat2rdf.py @ 770:c54bc2ffbf92 tip

update tags
author convert-repo
date Fri, 16 Dec 2011 11:34:01 +0000
parents 1a251dd217c6
children
rev   line source
mas01mj@640 1 #!/usr/bin/python
mas01mj@640 2
mas01mj@640 3 import sys
mas01mj@640 4 import psycopg2
mas01mj@640 5 import psycopg2.extras
mas01mj@640 6
mas01mj@640 7 from rdflib.Graph import ConjunctiveGraph as Graph
mas01mj@640 8 from rdflib import Namespace, Literal, URIRef, BNode, RDF
mas01mj@640 9
mas01mj@652 10 catalogueID = sys.argv[1]
mas01mj@640 11
mas01mj@640 12 foaf = Namespace("http://xmlns.com/foaf/0.1/")
mas01mj@640 13 mo = Namespace("http://purl.org/ontology/mo/")
mas01mj@640 14 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
mas01mj@640 15 dc = Namespace("http://purl.org/dc/elements/1.1/")
mas01mj@652 16 graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower()
mas01mj@652 17 audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#")
mas01mj@652 18 doap = Namespace("http://usefulinc.com/ns/doap#")
mas01mj@640 19
mas01mj@640 20 username = "USERNAME"
mas01mj@652 21 host = "HOSTNAME"
mas01mj@640 22 database = "DATABASE"
mas01mj@640 23
mas01mj@652 24
mas01mj@652 25 try:
mas01mj@652 26 conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
mas01mj@652 27 except:
mas01mj@652 28 print "Unable to connect to the database"
mas01mj@652 29
mas01mj@652 30 cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
mas01mj@652 31
mas01mj@640 32 counters = {}
mas01mj@640 33 namespaces = {}
mas01mj@652 34 extractors = {}
mas01mj@652 35 projects = {}
mas01mj@652 36 databases = {}
mas01mj@640 37
mas01mj@652 38 def loadFeatures(catalogueID):
mas01mj@652 39 cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""")
mas01mj@640 40 rows = cursor.fetchall()
mas01mj@640 41 return rows
mas01mj@640 42
mas01mj@652 43 def loadCatalogue(catalogueID):
mas01mj@652 44 cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""")
mas01mj@652 45 rows = cursor.fetchall()
mas01mj@652 46 return rows
mas01mj@652 47
mas01mj@652 48 def createFeatureGraphs(rows):
mas01mj@652 49 albums = {}
mas01mj@652 50
mas01mj@652 51 graph = Graph(identifier = URIRef(graph_uri))
mas01mj@652 52 counter = 1
mas01mj@652 53 databases[catalogueID] = []
mas01mj@652 54
mas01mj@652 55 for row in rows:
mas01mj@652 56
mas01mj@652 57 # Create all the relevant nodes (with the correct IDs)
mas01mj@652 58 database = getNewNode('database')
mas01mj@652 59 databases[catalogueID].append(database)
mas01mj@652 60 feature = getNewNode('feature')
mas01mj@652 61 segmentation = getNewNode('segmentation')
mas01mj@652 62 window = getNewNode('window')
mas01mj@652 63
mas01mj@652 64 if row['feature'] == "cqt":
mas01mj@652 65 graph.add((feature, RDF.type, audiodb['CQTFeature']))
mas01mj@652 66 elif row['feature'] == "chr":
mas01mj@652 67 graph.add((feature, RDF.type, audiodb['ChromagramFeature']))
mas01mj@652 68 elif row['feature'] == "mfcc":
mas01mj@652 69 graph.add((feature, RDF.type, audiodb['MFCCFeature']))
mas01mj@652 70
mas01mj@652 71 if row['segtype'] == "frames":
mas01mj@652 72 graph.add((segmentation, RDF.type, audiodb['FrameSegmentation']))
mas01mj@652 73 elif row['segtype'] == "beats":
mas01mj@652 74 graph.add((segmentation, RDF.type, audiodb['BeatSegmentation']))
mas01mj@652 75 elif row['segtype'] == "segs":
mas01mj@652 76 graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation']))
mas01mj@652 77
mas01mj@652 78 if row['windowtype'] == "hamming":
mas01mj@652 79 graph.add((window, RDF.type, audiodb['HammingWindow']))
mas01mj@652 80
mas01mj@652 81 graph.add((feature, audiodb["window"], window))
mas01mj@652 82 graph.add((feature, audiodb["segmentation"], segmentation))
mas01mj@652 83
mas01mj@652 84 graph.add((feature, audiodb["dimension"], Literal(row['dim'])))
mas01mj@652 85 graph.add((feature, audiodb["hop-size"], Literal(row['hopsize'])))
mas01mj@652 86 graph.add((feature, audiodb["window-length"], Literal(row['winlen'])))
mas01mj@652 87 graph.add((feature, audiodb["nfft"], Literal(row['nfft'])))
mas01mj@652 88 graph.add((feature, audiodb["segn"], Literal(row['segn'])))
mas01mj@652 89 graph.add((feature, audiodb["channel"], Literal(row['channel'])))
mas01mj@652 90 graph.add((feature, audiodb["loedge"], Literal(row['loedge'])))
mas01mj@652 91 graph.add((feature, audiodb["hiedge"], Literal(row['hiedge'])))
mas01mj@652 92 graph.add((feature, audiodb["octaveres"], Literal(row['octaveres'])))
mas01mj@652 93
mas01mj@652 94 version = buildNewExtractor(graph, row['software'], row['version'], row['platform'])
mas01mj@652 95
mas01mj@652 96 project = buildNewProject(graph, row['software'])
mas01mj@652 97 graph.add((project, doap['release'], version))
mas01mj@652 98
mas01mj@652 99 graph.add((database, RDF.type, audiodb["Database"]))
mas01mj@652 100 graph.add((database, audiodb["feature"], feature))
mas01mj@652 101 graph.add((database, audiodb["extractor"], version))
mas01mj@652 102
mas01mj@652 103 counter += 1
mas01mj@652 104 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
mas01mj@652 105
mas01mj@652 106 def buildNewExtractor(graph, software, version, platform):
mas01mj@652 107 key = software+"_"+version+"_"+platform
mas01mj@652 108 try:
mas01mj@652 109 extractor = extractors[key]
mas01mj@652 110 except KeyError:
mas01mj@652 111 extractor = getNewNode('extractor')
mas01mj@652 112 graph.add((extractor, RDF.type, doap["Version"]))
mas01mj@652 113 graph.add((extractor, doap['version'], Literal(version)))
mas01mj@652 114 graph.add((extractor, doap['name'], Literal(software)))
mas01mj@652 115 graph.add((extractor, doap['os'], Literal(platform)))
mas01mj@652 116 extractors[key] = extractor
mas01mj@652 117 return extractor
mas01mj@652 118
mas01mj@652 119 def buildNewProject(graph, software):
mas01mj@652 120 key = software
mas01mj@652 121 try:
mas01mj@652 122 project = projects[key]
mas01mj@652 123 except KeyError:
mas01mj@652 124 project = getNewNode('project')
mas01mj@652 125 graph.add((project, RDF.type, doap["Project"]))
mas01mj@652 126 graph.add((project, doap['name'], Literal(software)))
mas01mj@652 127 projects[key] = project
mas01mj@652 128 return project
mas01mj@652 129
mas01mj@652 130 def createMediaGraphs(rows):
mas01mj@640 131 albums = {}
mas01mj@640 132
mas01mj@640 133 artists = {
mas01mj@640 134 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'],
mas01mj@640 135 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'],
mas01mj@640 136 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
mas01mj@640 137
mas01mj@640 138 counter = 1
mas01mj@640 139 for row in rows:
mas01mj@652 140 graph = Graph(identifier = URIRef(graph_uri))
mas01mj@640 141 # Create all the relevant nodes (with the correct IDs)
mas01mj@640 142
mas01mj@640 143 work = getNewNode('work')
mas01mj@640 144 composition = getNewNode('composition')
mas01mj@640 145 track = getNewNode('track')
mas01mj@640 146 record = getNewNode('record')
mas01mj@640 147 performance = getNewNode('performance')
mas01mj@652 148 signal = Namespace(graph_uri+"/"+row['uid'])
mas01mj@640 149
mas01mj@640 150 # If we don't have an artist url, make a foaf Agent instead.
mas01mj@640 151 if row['artist']:
mas01mj@640 152 try:
mas01mj@640 153 artist = artists[row['artist']]
mas01mj@640 154 except KeyError:
mas01mj@640 155 artist = getNewNode('artist')
mas01mj@640 156 graph.add((artist, RDF.type, foaf['Agent']))
mas01mj@640 157 graph.add((artist, foaf['name'], Literal(row['artist'].strip())))
mas01mj@640 158 artists[row['artist']] = artist;
mas01mj@640 159
mas01mj@640 160 if row['composer']:
mas01mj@640 161 try:
mas01mj@640 162 composer = artists[row['composer']]
mas01mj@640 163 except KeyError:
mas01mj@640 164 composer = getNewNode('artist')
mas01mj@640 165 graph.add((composer, RDF.type, foaf['Agent']))
mas01mj@640 166 graph.add((composer, foaf['name'], Literal(row['composer'].strip())))
mas01mj@640 167 artists[row['composer']] = composer;
mas01mj@640 168 else:
mas01mj@640 169 composer = artist
mas01mj@640 170
mas01mj@640 171
mas01mj@640 172 # Work
mas01mj@640 173 graph.add((work, RDF.type, mo['MusicalWork']))
mas01mj@640 174
mas01mj@640 175 # Composition
mas01mj@640 176 graph.add((composition, RDF.type, mo['Composition']))
mas01mj@640 177 if composer:
mas01mj@640 178 graph.add((composition, mo['composer'], composer))
mas01mj@640 179 graph.add((composition, mo['produced_work'], work))
mas01mj@640 180
mas01mj@640 181 # Track
mas01mj@640 182 graph.add((track, RDF.type, mo['Track']))
mas01mj@640 183 if row['artist']:
mas01mj@640 184 graph.add((track, foaf['maker'], artist))
mas01mj@640 185 if row['tracknum']:
mas01mj@640 186 graph.add((track, mo['track_number'], Literal(row['tracknum'])))
mas01mj@640 187
mas01mj@652 188 if row['album']:
mas01mj@652 189 # Album
mas01mj@652 190 try:
mas01mj@652 191 album = albums[row['album']]
mas01mj@652 192 except KeyError:
mas01mj@652 193 album = getNewNode('album')
mas01mj@652 194 graph.add((album, RDF.type, mo['Record']))
mas01mj@652 195 graph.add((album, dc['title'], Literal(row['album'].strip())))
mas01mj@652 196 graph.add((album, mo['release_type'], mo['album']))
mas01mj@652 197 albums[row['album']] = album
mas01mj@652 198 graph.add((album, mo['track'], track))
mas01mj@640 199
mas01mj@640 200 # Signal
mas01mj@640 201 graph.add((signal, RDF.type, mo['Signal']))
mas01mj@640 202 graph.add((signal, mo['published_as'], record))
mas01mj@640 203
mas01mj@640 204 if row['track']:
mas01mj@640 205 graph.add((signal, dc['title'], Literal(row['track'].strip())))
mas01mj@640 206 if row['isrc']:
mas01mj@640 207 graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
mas01mj@640 208
mas01mj@652 209 # Add to the various databases
mas01mj@652 210 dbs = databases[catalogueID]
mas01mj@652 211 for db in dbs:
mas01mj@652 212 graph.add((db, audiodb["has-signal"], signal))
mas01mj@652 213
mas01mj@640 214 # Record
mas01mj@640 215 graph.add((record, RDF.type, mo['Record']))
mas01mj@640 216 graph.add((record, mo['publication_of'], signal))
mas01mj@640 217 graph.add((record, mo['track'], track))
mas01mj@640 218
mas01mj@640 219 # Performance
mas01mj@640 220 graph.add((performance, RDF.type, mo['Performance']))
mas01mj@640 221 graph.add((performance, mo['performance_of'], work))
mas01mj@640 222 if row['artist']:
mas01mj@640 223 graph.add((performance, mo['performer'], artist))
mas01mj@640 224 graph.add((performance, mo['recorded_as'], signal))
mas01mj@640 225
mas01mj@652 226 graph.close()
mas01mj@652 227 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf")
mas01mj@640 228 counter += 1
mas01mj@640 229
mas01mj@640 230 def getNewNode(type):
mas01mj@640 231 try:
mas01mj@640 232 count = counters[type]
mas01mj@640 233 except KeyError:
mas01mj@640 234 counters[type] = 1
mas01mj@640 235 count = counters[type]
mas01mj@640 236
mas01mj@640 237 try:
mas01mj@640 238 namespace = namespaces[type]
mas01mj@640 239 except KeyError:
mas01mj@652 240 namespaces[type] = Namespace(graph_uri+"/"+type+"/")
mas01mj@640 241 namespace = namespaces[type]
mas01mj@640 242
mas01mj@640 243 node = namespace[str(count)]
mas01mj@640 244 counters[type] += 1
mas01mj@640 245 return node
mas01mj@640 246
mas01mj@652 247 features = loadFeatures(catalogueID)
mas01mj@652 248 catalogue = loadCatalogue(catalogueID)
mas01mj@652 249
mas01mj@652 250 createFeatureGraphs(features)
mas01mj@652 251 createMediaGraphs(catalogue)