audiodb: examples/browser/export/cat2rdf.py comparison

comparison examples/browser/export/cat2rdf.py @ 652:1a251dd217c6

Ontology fine-tuned and included in exporter.

author	mas01mj
date	Thu, 15 Oct 2009 14:24:36 +0000
parents	901803e1305f
children

comparison

equal deleted inserted replaced

-:5b2c0d9bc63d
+:1a251dd217c6
 import psycopg2.extras
 from rdflib.Graph import ConjunctiveGraph as Graph
 from rdflib import Namespace, Literal, URIRef, BNode, RDF
-catalogue = sys.argv[1]
+catalogueID = sys.argv[1]
 foaf = Namespace("http://xmlns.com/foaf/0.1/")
 mo = Namespace("http://purl.org/ontology/mo/")
 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
 dc = Namespace("http://purl.org/dc/elements/1.1/")
-default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower()
+graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower()
+audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#")
+doap = Namespace("http://usefulinc.com/ns/doap#")
 username = "USERNAME"
-host = "HOST"
+host = "HOSTNAME"
 database = "DATABASE"
+try:
+	conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
+except:
+	print "Unable to connect to the database"
+cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
 counters = {}
 namespaces = {}
+extractors = {}
-def loadCatalogue(catalogue):
+projects = {}
-	try:
+databases = {}
-		conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
-	except:
+def loadFeatures(catalogueID):
-		print "Unable to connect to the database"
+	cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""")
-	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
-	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")
 	rows = cursor.fetchall()
 	return rows
-def createGraph(rows):
+def loadCatalogue(catalogueID):
+	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""")
+	rows = cursor.fetchall()
+	return rows
+def createFeatureGraphs(rows):
+	albums = {}
+	graph = Graph(identifier = URIRef(graph_uri))
+	counter = 1
+	databases[catalogueID] = []
+	for row in rows:
+		# Create all the relevant nodes (with the correct IDs)
+		database = getNewNode('database')
+		databases[catalogueID].append(database)
+		feature = getNewNode('feature')
+		segmentation = getNewNode('segmentation')
+		window = getNewNode('window')
+		if row['feature'] == "cqt":
+			graph.add((feature, RDF.type, audiodb['CQTFeature']))
+		elif row['feature'] == "chr":
+			graph.add((feature, RDF.type, audiodb['ChromagramFeature']))
+		elif row['feature'] == "mfcc":
+			graph.add((feature, RDF.type, audiodb['MFCCFeature']))
+		if row['segtype'] == "frames":
+			graph.add((segmentation, RDF.type, audiodb['FrameSegmentation']))
+		elif row['segtype'] == "beats":
+			graph.add((segmentation, RDF.type, audiodb['BeatSegmentation']))
+		elif row['segtype'] == "segs":
+			graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation']))
+		if row['windowtype'] == "hamming":
+			graph.add((window, RDF.type, audiodb['HammingWindow']))
+		graph.add((feature, audiodb["window"], window))
+		graph.add((feature, audiodb["segmentation"], segmentation))
+		graph.add((feature, audiodb["dimension"], Literal(row['dim'])))
+		graph.add((feature, audiodb["hop-size"], Literal(row['hopsize'])))
+		graph.add((feature, audiodb["window-length"], Literal(row['winlen'])))
+		graph.add((feature, audiodb["nfft"], Literal(row['nfft'])))
+		graph.add((feature, audiodb["segn"], Literal(row['segn'])))
+		graph.add((feature, audiodb["channel"], Literal(row['channel'])))
+		graph.add((feature, audiodb["loedge"], Literal(row['loedge'])))
+		graph.add((feature, audiodb["hiedge"], Literal(row['hiedge'])))
+		graph.add((feature, audiodb["octaveres"], Literal(row['octaveres'])))
+		version = buildNewExtractor(graph, row['software'], row['version'], row['platform'])
+		project = buildNewProject(graph, row['software'])
+		graph.add((project, doap['release'], version))
+		graph.add((database, RDF.type, audiodb["Database"]))
+		graph.add((database, audiodb["feature"], feature))
+		graph.add((database, audiodb["extractor"], version))
+		counter += 1
+	graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
+def buildNewExtractor(graph, software, version, platform):
+	key = software+"_"+version+"_"+platform
+	try:
+		extractor = extractors[key]
+	except KeyError:
+		extractor = getNewNode('extractor')
+		graph.add((extractor, RDF.type, doap["Version"]))
+		graph.add((extractor, doap['version'], Literal(version)))
+		graph.add((extractor, doap['name'], Literal(software)))
+		graph.add((extractor, doap['os'], Literal(platform)))
+		extractors[key] = extractor
+	return extractor
+def buildNewProject(graph, software):
+	key = software
+	try:
+		project = projects[key]
+	except KeyError:
+		project = getNewNode('project')
+		graph.add((project, RDF.type, doap["Project"]))
+		graph.add((project, doap['name'], Literal(software)))
+		projects[key] = project
+	return project
+def createMediaGraphs(rows):
 	albums = {}
 	artists = {
 		'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'],
 		'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'],
 		'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']}
 	counter = 1
 	for row in rows:
-		graph = Graph(identifier = URIRef(default_graph_uri))
+		graph = Graph(identifier = URIRef(graph_uri))
 		# Create all the relevant nodes (with the correct IDs)
 		work = getNewNode('work')
 		composition = getNewNode('composition')
 		track = getNewNode('track')
-		signal = getNewNode('signal')
 		record = getNewNode('record')
 		performance = getNewNode('performance')
+		signal = Namespace(graph_uri+"/"+row['uid'])
 		# If we don't have an artist url, make a foaf Agent instead.
 		if row['artist']:
 			try:
 				artist = artists[row['artist']]
 		if row['artist']:
 			graph.add((track, foaf['maker'], artist))
 		if row['tracknum']:
 			graph.add((track, mo['track_number'], Literal(row['tracknum'])))
-		# Album
+		if row['album']:
-		try:
+			# Album
-			album = albums[row['album']]
+			try:
-		except KeyError:
+				album = albums[row['album']]
-			album = getNewNode('album')
+			except KeyError:
-			graph.add((album, RDF.type, mo['Record']))
+				album = getNewNode('album')
-			graph.add((album, dc['title'], Literal(row['album'].strip())))
+				graph.add((album, RDF.type, mo['Record']))
-			graph.add((album, mo['release_type'], mo['album']))
+				graph.add((album, dc['title'], Literal(row['album'].strip())))
-			albums[row['album']] = album
+				graph.add((album, mo['release_type'], mo['album']))
-		graph.add((album, mo['track'], track))
+				albums[row['album']] = album
+			graph.add((album, mo['track'], track))
 		# Signal
 		graph.add((signal, RDF.type, mo['Signal']))
 		graph.add((signal, mo['published_as'], record))
 		if row['track']:
 			graph.add((signal, dc['title'], Literal(row['track'].strip())))
 		if row['isrc']:
 			graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
+		# Add to the various databases
+		dbs = databases[catalogueID]
+		for db in dbs:
+			graph.add((db, audiodb["has-signal"], signal))
 		# Record
 		graph.add((record, RDF.type, mo['Record']))
 		graph.add((record, mo['publication_of'], signal))
 		graph.add((record, mo['track'], track))
 		graph.add((performance, RDF.type, mo['Performance']))
 		graph.add((performance, mo['performance_of'], work))
 		if row['artist']:
 			graph.add((performance, mo['performer'], artist))
 		graph.add((performance, mo['recorded_as'], signal))
-		#graph.close()
+		graph.close()
+		graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf")
-		graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
 		counter += 1
 def getNewNode(type):
 	try:
 		count = counters[type]
 		count = counters[type]
 	try:
 		namespace = namespaces[type]
 	except KeyError:
-		namespaces[type] = Namespace(default_graph_uri+"/"+type+"/")
+		namespaces[type] = Namespace(graph_uri+"/"+type+"/")
 		namespace = namespaces[type]
 	node = namespace[str(count)]
 	counters[type] += 1
 	return node
-createGraph(loadCatalogue(catalogue))
+features = loadFeatures(catalogueID)
+catalogue = loadCatalogue(catalogueID)
+createFeatureGraphs(features)
+createMediaGraphs(catalogue)

Mercurial > hg > audiodb

comparison examples/browser/export/cat2rdf.py @ 652:1a251dd217c6