Mercurial > hg > audiodb
diff examples/browser/export/cat2rdf.py @ 652:1a251dd217c6
Ontology fine-tuned and included in exporter.
author | mas01mj |
---|---|
date | Thu, 15 Oct 2009 14:24:36 +0000 |
parents | 901803e1305f |
children |
line wrap: on
line diff
--- a/examples/browser/export/cat2rdf.py Wed Oct 14 12:12:39 2009 +0000 +++ b/examples/browser/export/cat2rdf.py Thu Oct 15 14:24:36 2009 +0000 @@ -7,36 +7,127 @@ from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import Namespace, Literal, URIRef, BNode, RDF -catalogue = sys.argv[1] +catalogueID = sys.argv[1] foaf = Namespace("http://xmlns.com/foaf/0.1/") mo = Namespace("http://purl.org/ontology/mo/") mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") dc = Namespace("http://purl.org/dc/elements/1.1/") -default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower() +graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() +audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") +doap = Namespace("http://usefulinc.com/ns/doap#") username = "USERNAME" -host = "HOST" +host = "HOSTNAME" database = "DATABASE" + +try: + conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); +except: + print "Unable to connect to the database" + +cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + counters = {} namespaces = {} +extractors = {} +projects = {} +databases = {} -def loadCatalogue(catalogue): - try: - conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); - except: - print "Unable to connect to the database" - - cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) - - cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""") - +def loadFeatures(catalogueID): + cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() - return rows -def createGraph(rows): +def loadCatalogue(catalogueID): + cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") + rows = cursor.fetchall() + return rows + +def createFeatureGraphs(rows): + albums = {} + + graph = Graph(identifier = URIRef(graph_uri)) + counter = 1 + databases[catalogueID] = [] + + for row in rows: + + # Create all the relevant nodes (with the correct IDs) + database = getNewNode('database') + databases[catalogueID].append(database) + feature = getNewNode('feature') + segmentation = getNewNode('segmentation') + window = getNewNode('window') + + if row['feature'] == "cqt": + graph.add((feature, RDF.type, audiodb['CQTFeature'])) + elif row['feature'] == "chr": + graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) + elif row['feature'] == "mfcc": + graph.add((feature, RDF.type, audiodb['MFCCFeature'])) + + if row['segtype'] == "frames": + graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) + elif row['segtype'] == "beats": + graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) + elif row['segtype'] == "segs": + graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) + + if row['windowtype'] == "hamming": + graph.add((window, RDF.type, audiodb['HammingWindow'])) + + graph.add((feature, audiodb["window"], window)) + graph.add((feature, audiodb["segmentation"], segmentation)) + + graph.add((feature, audiodb["dimension"], Literal(row['dim']))) + graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) + graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) + graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) + graph.add((feature, audiodb["segn"], Literal(row['segn']))) + graph.add((feature, audiodb["channel"], Literal(row['channel']))) + graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) + graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) + graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) + + version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) + + project = buildNewProject(graph, row['software']) + graph.add((project, doap['release'], version)) + + graph.add((database, RDF.type, audiodb["Database"])) + graph.add((database, audiodb["feature"], feature)) + graph.add((database, audiodb["extractor"], version)) + + counter += 1 + graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") + +def buildNewExtractor(graph, software, version, platform): + key = software+"_"+version+"_"+platform + try: + extractor = extractors[key] + except KeyError: + extractor = getNewNode('extractor') + graph.add((extractor, RDF.type, doap["Version"])) + graph.add((extractor, doap['version'], Literal(version))) + graph.add((extractor, doap['name'], Literal(software))) + graph.add((extractor, doap['os'], Literal(platform))) + extractors[key] = extractor + return extractor + +def buildNewProject(graph, software): + key = software + try: + project = projects[key] + except KeyError: + project = getNewNode('project') + graph.add((project, RDF.type, doap["Project"])) + graph.add((project, doap['name'], Literal(software))) + projects[key] = project + return project + +def createMediaGraphs(rows): albums = {} artists = { @@ -46,16 +137,15 @@ counter = 1 for row in rows: - graph = Graph(identifier = URIRef(default_graph_uri)) - + graph = Graph(identifier = URIRef(graph_uri)) # Create all the relevant nodes (with the correct IDs) work = getNewNode('work') composition = getNewNode('composition') track = getNewNode('track') - signal = getNewNode('signal') record = getNewNode('record') performance = getNewNode('performance') + signal = Namespace(graph_uri+"/"+row['uid']) # If we don't have an artist url, make a foaf Agent instead. if row['artist']: @@ -95,16 +185,17 @@ if row['tracknum']: graph.add((track, mo['track_number'], Literal(row['tracknum']))) - # Album - try: - album = albums[row['album']] - except KeyError: - album = getNewNode('album') - graph.add((album, RDF.type, mo['Record'])) - graph.add((album, dc['title'], Literal(row['album'].strip()))) - graph.add((album, mo['release_type'], mo['album'])) - albums[row['album']] = album - graph.add((album, mo['track'], track)) + if row['album']: + # Album + try: + album = albums[row['album']] + except KeyError: + album = getNewNode('album') + graph.add((album, RDF.type, mo['Record'])) + graph.add((album, dc['title'], Literal(row['album'].strip()))) + graph.add((album, mo['release_type'], mo['album'])) + albums[row['album']] = album + graph.add((album, mo['track'], track)) # Signal graph.add((signal, RDF.type, mo['Signal'])) @@ -115,6 +206,11 @@ if row['isrc']: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) + # Add to the various databases + dbs = databases[catalogueID] + for db in dbs: + graph.add((db, audiodb["has-signal"], signal)) + # Record graph.add((record, RDF.type, mo['Record'])) graph.add((record, mo['publication_of'], signal)) @@ -126,10 +222,9 @@ if row['artist']: graph.add((performance, mo['performer'], artist)) graph.add((performance, mo['recorded_as'], signal)) - - #graph.close() - graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf") + graph.close() + graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") counter += 1 def getNewNode(type): @@ -142,11 +237,15 @@ try: namespace = namespaces[type] except KeyError: - namespaces[type] = Namespace(default_graph_uri+"/"+type+"/") + namespaces[type] = Namespace(graph_uri+"/"+type+"/") namespace = namespaces[type] node = namespace[str(count)] counters[type] += 1 return node -createGraph(loadCatalogue(catalogue)) +features = loadFeatures(catalogueID) +catalogue = loadCatalogue(catalogueID) + +createFeatureGraphs(features) +createMediaGraphs(catalogue)