Mercurial > hg > audiodb
view examples/browser/export/cat2rdf.py @ 770:c54bc2ffbf92 tip
update tags
author | convert-repo |
---|---|
date | Fri, 16 Dec 2011 11:34:01 +0000 |
parents | 1a251dd217c6 |
children |
line wrap: on
line source
#!/usr/bin/python import sys import psycopg2 import psycopg2.extras from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import Namespace, Literal, URIRef, BNode, RDF catalogueID = sys.argv[1] foaf = Namespace("http://xmlns.com/foaf/0.1/") mo = Namespace("http://purl.org/ontology/mo/") mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") dc = Namespace("http://purl.org/dc/elements/1.1/") graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") doap = Namespace("http://usefulinc.com/ns/doap#") username = "USERNAME" host = "HOSTNAME" database = "DATABASE" try: conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); except: print "Unable to connect to the database" cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) counters = {} namespaces = {} extractors = {} projects = {} databases = {} def loadFeatures(catalogueID): cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() return rows def loadCatalogue(catalogueID): cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() return rows def createFeatureGraphs(rows): albums = {} graph = Graph(identifier = URIRef(graph_uri)) counter = 1 databases[catalogueID] = [] for row in rows: # Create all the relevant nodes (with the correct IDs) database = getNewNode('database') databases[catalogueID].append(database) feature = getNewNode('feature') segmentation = getNewNode('segmentation') window = getNewNode('window') if row['feature'] == "cqt": graph.add((feature, RDF.type, audiodb['CQTFeature'])) elif row['feature'] == "chr": graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) elif row['feature'] == "mfcc": graph.add((feature, RDF.type, audiodb['MFCCFeature'])) if row['segtype'] == "frames": graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) elif row['segtype'] == "beats": graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) elif row['segtype'] == "segs": graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) if row['windowtype'] == "hamming": graph.add((window, RDF.type, audiodb['HammingWindow'])) graph.add((feature, audiodb["window"], window)) graph.add((feature, audiodb["segmentation"], segmentation)) graph.add((feature, audiodb["dimension"], Literal(row['dim']))) graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) graph.add((feature, audiodb["segn"], Literal(row['segn']))) graph.add((feature, audiodb["channel"], Literal(row['channel']))) graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) project = buildNewProject(graph, row['software']) graph.add((project, doap['release'], version)) graph.add((database, RDF.type, audiodb["Database"])) graph.add((database, audiodb["feature"], feature)) graph.add((database, audiodb["extractor"], version)) counter += 1 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") def buildNewExtractor(graph, software, version, platform): key = software+"_"+version+"_"+platform try: extractor = extractors[key] except KeyError: extractor = getNewNode('extractor') graph.add((extractor, RDF.type, doap["Version"])) graph.add((extractor, doap['version'], Literal(version))) graph.add((extractor, doap['name'], Literal(software))) graph.add((extractor, doap['os'], Literal(platform))) extractors[key] = extractor return extractor def buildNewProject(graph, software): key = software try: project = projects[key] except KeyError: project = getNewNode('project') graph.add((project, RDF.type, doap["Project"])) graph.add((project, doap['name'], Literal(software))) projects[key] = project return project def createMediaGraphs(rows): albums = {} artists = { 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} counter = 1 for row in rows: graph = Graph(identifier = URIRef(graph_uri)) # Create all the relevant nodes (with the correct IDs) work = getNewNode('work') composition = getNewNode('composition') track = getNewNode('track') record = getNewNode('record') performance = getNewNode('performance') signal = Namespace(graph_uri+"/"+row['uid']) # If we don't have an artist url, make a foaf Agent instead. if row['artist']: try: artist = artists[row['artist']] except KeyError: artist = getNewNode('artist') graph.add((artist, RDF.type, foaf['Agent'])) graph.add((artist, foaf['name'], Literal(row['artist'].strip()))) artists[row['artist']] = artist; if row['composer']: try: composer = artists[row['composer']] except KeyError: composer = getNewNode('artist') graph.add((composer, RDF.type, foaf['Agent'])) graph.add((composer, foaf['name'], Literal(row['composer'].strip()))) artists[row['composer']] = composer; else: composer = artist # Work graph.add((work, RDF.type, mo['MusicalWork'])) # Composition graph.add((composition, RDF.type, mo['Composition'])) if composer: graph.add((composition, mo['composer'], composer)) graph.add((composition, mo['produced_work'], work)) # Track graph.add((track, RDF.type, mo['Track'])) if row['artist']: graph.add((track, foaf['maker'], artist)) if row['tracknum']: graph.add((track, mo['track_number'], Literal(row['tracknum']))) if row['album']: # Album try: album = albums[row['album']] except KeyError: album = getNewNode('album') graph.add((album, RDF.type, mo['Record'])) graph.add((album, dc['title'], Literal(row['album'].strip()))) graph.add((album, mo['release_type'], mo['album'])) albums[row['album']] = album graph.add((album, mo['track'], track)) # Signal graph.add((signal, RDF.type, mo['Signal'])) graph.add((signal, mo['published_as'], record)) if row['track']: graph.add((signal, dc['title'], Literal(row['track'].strip()))) if row['isrc']: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) # Add to the various databases dbs = databases[catalogueID] for db in dbs: graph.add((db, audiodb["has-signal"], signal)) # Record graph.add((record, RDF.type, mo['Record'])) graph.add((record, mo['publication_of'], signal)) graph.add((record, mo['track'], track)) # Performance graph.add((performance, RDF.type, mo['Performance'])) graph.add((performance, mo['performance_of'], work)) if row['artist']: graph.add((performance, mo['performer'], artist)) graph.add((performance, mo['recorded_as'], signal)) graph.close() graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") counter += 1 def getNewNode(type): try: count = counters[type] except KeyError: counters[type] = 1 count = counters[type] try: namespace = namespaces[type] except KeyError: namespaces[type] = Namespace(graph_uri+"/"+type+"/") namespace = namespaces[type] node = namespace[str(count)] counters[type] += 1 return node features = loadFeatures(catalogueID) catalogue = loadCatalogue(catalogueID) createFeatureGraphs(features) createMediaGraphs(catalogue)