Mercurial > hg > audiodb
changeset 652:1a251dd217c6
Ontology fine-tuned and included in exporter.
author | mas01mj |
---|---|
date | Thu, 15 Oct 2009 14:24:36 +0000 |
parents | 5b2c0d9bc63d |
children | 3e25f26b6a27 |
files | examples/browser/export/cat2rdf.py examples/browser/ontology/audiodb.owl |
diffstat | 2 files changed, 147 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/examples/browser/export/cat2rdf.py Wed Oct 14 12:12:39 2009 +0000 +++ b/examples/browser/export/cat2rdf.py Thu Oct 15 14:24:36 2009 +0000 @@ -7,36 +7,127 @@ from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import Namespace, Literal, URIRef, BNode, RDF -catalogue = sys.argv[1] +catalogueID = sys.argv[1] foaf = Namespace("http://xmlns.com/foaf/0.1/") mo = Namespace("http://purl.org/ontology/mo/") mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") dc = Namespace("http://purl.org/dc/elements/1.1/") -default_graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogue.lower() +graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() +audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") +doap = Namespace("http://usefulinc.com/ns/doap#") username = "USERNAME" -host = "HOST" +host = "HOSTNAME" database = "DATABASE" + +try: + conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); +except: + print "Unable to connect to the database" + +cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) + counters = {} namespaces = {} +extractors = {} +projects = {} +databases = {} -def loadCatalogue(catalogue): - try: - conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); - except: - print "Unable to connect to the database" - - cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) - - cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogue+"""'""") - +def loadFeatures(catalogueID): + cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() - return rows -def createGraph(rows): +def loadCatalogue(catalogueID): + cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") + rows = cursor.fetchall() + return rows + +def createFeatureGraphs(rows): + albums = {} + + graph = Graph(identifier = URIRef(graph_uri)) + counter = 1 + databases[catalogueID] = [] + + for row in rows: + + # Create all the relevant nodes (with the correct IDs) + database = getNewNode('database') + databases[catalogueID].append(database) + feature = getNewNode('feature') + segmentation = getNewNode('segmentation') + window = getNewNode('window') + + if row['feature'] == "cqt": + graph.add((feature, RDF.type, audiodb['CQTFeature'])) + elif row['feature'] == "chr": + graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) + elif row['feature'] == "mfcc": + graph.add((feature, RDF.type, audiodb['MFCCFeature'])) + + if row['segtype'] == "frames": + graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) + elif row['segtype'] == "beats": + graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) + elif row['segtype'] == "segs": + graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) + + if row['windowtype'] == "hamming": + graph.add((window, RDF.type, audiodb['HammingWindow'])) + + graph.add((feature, audiodb["window"], window)) + graph.add((feature, audiodb["segmentation"], segmentation)) + + graph.add((feature, audiodb["dimension"], Literal(row['dim']))) + graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) + graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) + graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) + graph.add((feature, audiodb["segn"], Literal(row['segn']))) + graph.add((feature, audiodb["channel"], Literal(row['channel']))) + graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) + graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) + graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) + + version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) + + project = buildNewProject(graph, row['software']) + graph.add((project, doap['release'], version)) + + graph.add((database, RDF.type, audiodb["Database"])) + graph.add((database, audiodb["feature"], feature)) + graph.add((database, audiodb["extractor"], version)) + + counter += 1 + graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") + +def buildNewExtractor(graph, software, version, platform): + key = software+"_"+version+"_"+platform + try: + extractor = extractors[key] + except KeyError: + extractor = getNewNode('extractor') + graph.add((extractor, RDF.type, doap["Version"])) + graph.add((extractor, doap['version'], Literal(version))) + graph.add((extractor, doap['name'], Literal(software))) + graph.add((extractor, doap['os'], Literal(platform))) + extractors[key] = extractor + return extractor + +def buildNewProject(graph, software): + key = software + try: + project = projects[key] + except KeyError: + project = getNewNode('project') + graph.add((project, RDF.type, doap["Project"])) + graph.add((project, doap['name'], Literal(software))) + projects[key] = project + return project + +def createMediaGraphs(rows): albums = {} artists = { @@ -46,16 +137,15 @@ counter = 1 for row in rows: - graph = Graph(identifier = URIRef(default_graph_uri)) - + graph = Graph(identifier = URIRef(graph_uri)) # Create all the relevant nodes (with the correct IDs) work = getNewNode('work') composition = getNewNode('composition') track = getNewNode('track') - signal = getNewNode('signal') record = getNewNode('record') performance = getNewNode('performance') + signal = Namespace(graph_uri+"/"+row['uid']) # If we don't have an artist url, make a foaf Agent instead. if row['artist']: @@ -95,16 +185,17 @@ if row['tracknum']: graph.add((track, mo['track_number'], Literal(row['tracknum']))) - # Album - try: - album = albums[row['album']] - except KeyError: - album = getNewNode('album') - graph.add((album, RDF.type, mo['Record'])) - graph.add((album, dc['title'], Literal(row['album'].strip()))) - graph.add((album, mo['release_type'], mo['album'])) - albums[row['album']] = album - graph.add((album, mo['track'], track)) + if row['album']: + # Album + try: + album = albums[row['album']] + except KeyError: + album = getNewNode('album') + graph.add((album, RDF.type, mo['Record'])) + graph.add((album, dc['title'], Literal(row['album'].strip()))) + graph.add((album, mo['release_type'], mo['album'])) + albums[row['album']] = album + graph.add((album, mo['track'], track)) # Signal graph.add((signal, RDF.type, mo['Signal'])) @@ -115,6 +206,11 @@ if row['isrc']: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) + # Add to the various databases + dbs = databases[catalogueID] + for db in dbs: + graph.add((db, audiodb["has-signal"], signal)) + # Record graph.add((record, RDF.type, mo['Record'])) graph.add((record, mo['publication_of'], signal)) @@ -126,10 +222,9 @@ if row['artist']: graph.add((performance, mo['performer'], artist)) graph.add((performance, mo['recorded_as'], signal)) - - #graph.close() - graph.serialize(format='xml',destination="output/"+catalogue.lower()+"_"+str(counter)+".rdf") + graph.close() + graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") counter += 1 def getNewNode(type): @@ -142,11 +237,15 @@ try: namespace = namespaces[type] except KeyError: - namespaces[type] = Namespace(default_graph_uri+"/"+type+"/") + namespaces[type] = Namespace(graph_uri+"/"+type+"/") namespace = namespaces[type] node = namespace[str(count)] counters[type] += 1 return node -createGraph(loadCatalogue(catalogue)) +features = loadFeatures(catalogueID) +catalogue = loadCatalogue(catalogueID) + +createFeatureGraphs(features) +createMediaGraphs(catalogue)
--- a/examples/browser/ontology/audiodb.owl Wed Oct 14 12:12:39 2009 +0000 +++ b/examples/browser/ontology/audiodb.owl Thu Oct 15 14:24:36 2009 +0000 @@ -11,6 +11,7 @@ <!ENTITY off "http://purl.org/ontology/off/"> <!ENTITY foaf "http://xmlns.com/foaf/0.1/"> <!ENTITY doap "http://usefulinc.com/ns/doap#"> + <!ENTITY mo "http://purl.org/ontology/mo/"> ]> <rdf:RDF xmlns:rdf="&rdf;" @@ -21,7 +22,8 @@ xmlns:dc="&dc;" xmlns:dct="&dct;" xmlns:foaf="&foaf;" - xmlns:off="&off;"> + xmlns:off="&off;" + xmlns:mo="&mo;"> <owl:Ontology rdf:about="&base;"> <rdfs:label>AudioDB Ontology</rdfs:label> @@ -37,9 +39,15 @@ <rdfs:comment>Represents a collection of extracted features and information about their extraction.</rdfs:comment> <rdfs:subClassOf rdf:resource="&foaf;Document" /> </owl:Class> - - <owl:ObjectProperty rdf:ID="has-feature"> - <rdfs:label>Has Feature</rdfs:label> + + <owl:ObjectProperty rdf:ID="has-signal"> + <rdf:label>Has Feature</rdfs:label> + <rdfs:domain rdf:resource="#Database" /> + <rdfs:range rdf:resource="&mo;Signal" /> + </owl:ObjectProperty> + + <owl:ObjectProperty rdf:ID="feature"> + <rdfs:label>Feature</rdfs:label> <rdfs:domain rdf:resource="#Database"/> <rdfs:range rdf:resource="#Feature" /> </owl:ObjectProperty> @@ -115,8 +123,8 @@ <rdfs:range rdf:resource="&xsd;double" /> </owl:ObjectProperty> - <owl:ObjectProperty rdf:ID="segmentation-type"> - <rdfs:label>segmentation type</rdfs:label> + <owl:ObjectProperty rdf:ID="segmentation"> + <rdfs:label>segmentation</rdfs:label> <rdfs:domain rdf:resource="#Feature"/> <rdfs:range rdf:resource="#Segmentation" /> </owl:ObjectProperty> @@ -150,7 +158,7 @@ <rdfs:subClassOf rdf:resource="#Window" /> </owl:Class> - <owl:Class rdf:ID="ChromogramFeature"> + <owl:Class rdf:ID="ChromagramFeature"> <rdfs:label>Chromogram Feature</rdfs:label> <rdfs:subClassOf rdf:resource="#Feature" /> </owl:Class>