diff examples/browser/export/cat2rdf.py @ 652:1a251dd217c6

Ontology fine-tuned and included in exporter.
author mas01mj
date Thu, 15 Oct 2009 14:24:36 +0000
parents 901803e1305f
children
line wrap: on
line diff
--- a/examples/browser/export/cat2rdf.py	Wed Oct 14 12:12:39 2009 +0000
+++ b/examples/browser/export/cat2rdf.py	Thu Oct 15 14:24:36 2009 +0000
@@ -7,36 +7,127 @@
 from rdflib.Graph import ConjunctiveGraph as Graph
 from rdflib import Namespace, Literal, URIRef, BNode, RDF
 
-catalogue = sys.argv[1]
+catalogueID = sys.argv[1]
 
 foaf = Namespace("http://xmlns.com/foaf/0.1/")
 mo = Namespace("http://purl.org/ontology/mo/")
 mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/")
 dc = Namespace("http://purl.org/dc/elements/1.1/")
-default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower()
+graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower()
+audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#")
+doap = Namespace("http://usefulinc.com/ns/doap#")
 
 username = "USERNAME"
-host = "HOST"
+host = "HOSTNAME"
 database = "DATABASE"
 
+
+try:
+	conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
+except:
+	print "Unable to connect to the database"
+
+cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
+
 counters = {}
 namespaces = {}
+extractors = {}
+projects = {}
+databases = {}
 
-def loadCatalogue(catalogue):
-	try:
-		conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'");
-	except:
-		print "Unable to connect to the database"
-
-	cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
-
-	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""")
-
+def loadFeatures(catalogueID):
+	cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""")
 	rows = cursor.fetchall()
-
 	return rows
 
-def createGraph(rows):
+def loadCatalogue(catalogueID):
+	cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""")
+	rows = cursor.fetchall()
+	return rows
+
+def createFeatureGraphs(rows):
+	albums = {}
+
+	graph = Graph(identifier = URIRef(graph_uri))
+	counter = 1
+	databases[catalogueID] = []
+
+	for row in rows:
+	
+		# Create all the relevant nodes (with the correct IDs)
+		database = getNewNode('database')
+		databases[catalogueID].append(database)
+		feature = getNewNode('feature')
+		segmentation = getNewNode('segmentation')
+		window = getNewNode('window')
+
+		if row['feature'] == "cqt":
+			graph.add((feature, RDF.type, audiodb['CQTFeature']))
+		elif row['feature'] == "chr":
+			graph.add((feature, RDF.type, audiodb['ChromagramFeature']))
+		elif row['feature'] == "mfcc":
+			graph.add((feature, RDF.type, audiodb['MFCCFeature']))
+
+		if row['segtype'] == "frames":
+			graph.add((segmentation, RDF.type, audiodb['FrameSegmentation']))
+		elif row['segtype'] == "beats":
+			graph.add((segmentation, RDF.type, audiodb['BeatSegmentation']))
+		elif row['segtype'] == "segs":
+			graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation']))
+		
+		if row['windowtype'] == "hamming":
+			graph.add((window, RDF.type, audiodb['HammingWindow']))
+
+		graph.add((feature, audiodb["window"], window))
+		graph.add((feature, audiodb["segmentation"], segmentation))
+
+		graph.add((feature, audiodb["dimension"], Literal(row['dim'])))
+		graph.add((feature, audiodb["hop-size"], Literal(row['hopsize'])))
+		graph.add((feature, audiodb["window-length"], Literal(row['winlen'])))
+		graph.add((feature, audiodb["nfft"], Literal(row['nfft'])))
+		graph.add((feature, audiodb["segn"], Literal(row['segn'])))
+		graph.add((feature, audiodb["channel"], Literal(row['channel'])))
+		graph.add((feature, audiodb["loedge"], Literal(row['loedge'])))
+		graph.add((feature, audiodb["hiedge"], Literal(row['hiedge'])))
+		graph.add((feature, audiodb["octaveres"], Literal(row['octaveres'])))
+
+		version = buildNewExtractor(graph, row['software'], row['version'], row['platform'])
+
+		project = buildNewProject(graph, row['software'])
+		graph.add((project, doap['release'], version))
+
+		graph.add((database, RDF.type, audiodb["Database"]))
+		graph.add((database, audiodb["feature"], feature))
+		graph.add((database, audiodb["extractor"], version))
+		
+		counter += 1
+	graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf")
+ 
+def buildNewExtractor(graph, software, version, platform):
+	key = software+"_"+version+"_"+platform
+	try:
+		extractor = extractors[key]
+	except KeyError:
+		extractor = getNewNode('extractor')
+		graph.add((extractor, RDF.type, doap["Version"]))
+		graph.add((extractor, doap['version'], Literal(version)))
+		graph.add((extractor, doap['name'], Literal(software)))
+		graph.add((extractor, doap['os'], Literal(platform)))
+		extractors[key] = extractor
+	return extractor
+
+def buildNewProject(graph, software):
+	key = software
+	try:
+		project = projects[key]
+	except KeyError:
+		project = getNewNode('project')
+		graph.add((project, RDF.type, doap["Project"]))
+		graph.add((project, doap['name'], Literal(software)))
+		projects[key] = project
+	return project
+
+def createMediaGraphs(rows):
 	albums = {}
 
 	artists = {
@@ -46,16 +137,15 @@
 
 	counter = 1
 	for row in rows:
-		graph = Graph(identifier = URIRef(default_graph_uri))
-		
+		graph = Graph(identifier = URIRef(graph_uri))
 		# Create all the relevant nodes (with the correct IDs)
 
 		work = getNewNode('work')
 		composition = getNewNode('composition')
 		track = getNewNode('track')
-		signal = getNewNode('signal')
 		record = getNewNode('record')
 		performance = getNewNode('performance')
+		signal = Namespace(graph_uri+"/"+row['uid'])
 
 		# If we don't have an artist url, make a foaf Agent instead.
 		if row['artist']:
@@ -95,16 +185,17 @@
 		if row['tracknum']:
 			graph.add((track, mo['track_number'], Literal(row['tracknum'])))
 
-		# Album
-		try:
-			album = albums[row['album']]
-		except KeyError:
-			album = getNewNode('album')
-			graph.add((album, RDF.type, mo['Record']))
-			graph.add((album, dc['title'], Literal(row['album'].strip())))
-			graph.add((album, mo['release_type'], mo['album']))
-			albums[row['album']] = album
-		graph.add((album, mo['track'], track))
+		if row['album']:
+			# Album
+			try:
+				album = albums[row['album']]
+			except KeyError:
+				album = getNewNode('album')
+				graph.add((album, RDF.type, mo['Record']))
+				graph.add((album, dc['title'], Literal(row['album'].strip())))
+				graph.add((album, mo['release_type'], mo['album']))
+				albums[row['album']] = album
+			graph.add((album, mo['track'], track))
 
 		# Signal
 		graph.add((signal, RDF.type, mo['Signal']))
@@ -115,6 +206,11 @@
 		if row['isrc']:
 			graph.add((signal, mo['isrc'], Literal(row['isrc'].strip())))
 
+		# Add to the various databases
+		dbs = databases[catalogueID]
+		for db in dbs:
+			graph.add((db, audiodb["has-signal"], signal))
+
 		# Record
 		graph.add((record, RDF.type, mo['Record']))
 		graph.add((record, mo['publication_of'], signal))
@@ -126,10 +222,9 @@
 		if row['artist']:
 			graph.add((performance, mo['performer'], artist))
 		graph.add((performance, mo['recorded_as'], signal))
-
-		#graph.close()
 		
-		graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf")
+		graph.close()
+		graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf")
 		counter += 1
  
 def getNewNode(type):
@@ -142,11 +237,15 @@
 	try:
 		namespace = namespaces[type]
 	except KeyError:
-		namespaces[type] = Namespace(default_graph_uri+"/"+type+"/")
+		namespaces[type] = Namespace(graph_uri+"/"+type+"/")
 		namespace = namespaces[type]
 
 	node = namespace[str(count)]
 	counters[type] += 1
 	return node
 
-createGraph(loadCatalogue(catalogue))
+features = loadFeatures(catalogueID)
+catalogue = loadCatalogue(catalogueID)
+
+createFeatureGraphs(features)
+createMediaGraphs(catalogue)