mas01mj@640: #!/usr/bin/python mas01mj@640: mas01mj@640: import sys mas01mj@640: import psycopg2 mas01mj@640: import psycopg2.extras mas01mj@640: mas01mj@640: from rdflib.Graph import ConjunctiveGraph as Graph mas01mj@640: from rdflib import Namespace, Literal, URIRef, BNode, RDF mas01mj@640: mas01mj@652: catalogueID = sys.argv[1] mas01mj@640: mas01mj@640: foaf = Namespace("http://xmlns.com/foaf/0.1/") mas01mj@640: mo = Namespace("http://purl.org/ontology/mo/") mas01mj@640: mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") mas01mj@640: dc = Namespace("http://purl.org/dc/elements/1.1/") mas01mj@652: graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() mas01mj@652: audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") mas01mj@652: doap = Namespace("http://usefulinc.com/ns/doap#") mas01mj@640: mas01mj@640: username = "USERNAME" mas01mj@652: host = "HOSTNAME" mas01mj@640: database = "DATABASE" mas01mj@640: mas01mj@652: mas01mj@652: try: mas01mj@652: conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); mas01mj@652: except: mas01mj@652: print "Unable to connect to the database" mas01mj@652: mas01mj@652: cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) mas01mj@652: mas01mj@640: counters = {} mas01mj@640: namespaces = {} mas01mj@652: extractors = {} mas01mj@652: projects = {} mas01mj@652: databases = {} mas01mj@640: mas01mj@652: def loadFeatures(catalogueID): mas01mj@652: cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") mas01mj@640: rows = cursor.fetchall() mas01mj@640: return rows mas01mj@640: mas01mj@652: def loadCatalogue(catalogueID): mas01mj@652: cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") mas01mj@652: rows = cursor.fetchall() mas01mj@652: return rows mas01mj@652: mas01mj@652: def createFeatureGraphs(rows): mas01mj@652: albums = {} mas01mj@652: mas01mj@652: graph = Graph(identifier = URIRef(graph_uri)) mas01mj@652: counter = 1 mas01mj@652: databases[catalogueID] = [] mas01mj@652: mas01mj@652: for row in rows: mas01mj@652: mas01mj@652: # Create all the relevant nodes (with the correct IDs) mas01mj@652: database = getNewNode('database') mas01mj@652: databases[catalogueID].append(database) mas01mj@652: feature = getNewNode('feature') mas01mj@652: segmentation = getNewNode('segmentation') mas01mj@652: window = getNewNode('window') mas01mj@652: mas01mj@652: if row['feature'] == "cqt": mas01mj@652: graph.add((feature, RDF.type, audiodb['CQTFeature'])) mas01mj@652: elif row['feature'] == "chr": mas01mj@652: graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) mas01mj@652: elif row['feature'] == "mfcc": mas01mj@652: graph.add((feature, RDF.type, audiodb['MFCCFeature'])) mas01mj@652: mas01mj@652: if row['segtype'] == "frames": mas01mj@652: graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) mas01mj@652: elif row['segtype'] == "beats": mas01mj@652: graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) mas01mj@652: elif row['segtype'] == "segs": mas01mj@652: graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) mas01mj@652: mas01mj@652: if row['windowtype'] == "hamming": mas01mj@652: graph.add((window, RDF.type, audiodb['HammingWindow'])) mas01mj@652: mas01mj@652: graph.add((feature, audiodb["window"], window)) mas01mj@652: graph.add((feature, audiodb["segmentation"], segmentation)) mas01mj@652: mas01mj@652: graph.add((feature, audiodb["dimension"], Literal(row['dim']))) mas01mj@652: graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) mas01mj@652: graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) mas01mj@652: graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) mas01mj@652: graph.add((feature, audiodb["segn"], Literal(row['segn']))) mas01mj@652: graph.add((feature, audiodb["channel"], Literal(row['channel']))) mas01mj@652: graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) mas01mj@652: graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) mas01mj@652: graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) mas01mj@652: mas01mj@652: version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) mas01mj@652: mas01mj@652: project = buildNewProject(graph, row['software']) mas01mj@652: graph.add((project, doap['release'], version)) mas01mj@652: mas01mj@652: graph.add((database, RDF.type, audiodb["Database"])) mas01mj@652: graph.add((database, audiodb["feature"], feature)) mas01mj@652: graph.add((database, audiodb["extractor"], version)) mas01mj@652: mas01mj@652: counter += 1 mas01mj@652: graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") mas01mj@652: mas01mj@652: def buildNewExtractor(graph, software, version, platform): mas01mj@652: key = software+"_"+version+"_"+platform mas01mj@652: try: mas01mj@652: extractor = extractors[key] mas01mj@652: except KeyError: mas01mj@652: extractor = getNewNode('extractor') mas01mj@652: graph.add((extractor, RDF.type, doap["Version"])) mas01mj@652: graph.add((extractor, doap['version'], Literal(version))) mas01mj@652: graph.add((extractor, doap['name'], Literal(software))) mas01mj@652: graph.add((extractor, doap['os'], Literal(platform))) mas01mj@652: extractors[key] = extractor mas01mj@652: return extractor mas01mj@652: mas01mj@652: def buildNewProject(graph, software): mas01mj@652: key = software mas01mj@652: try: mas01mj@652: project = projects[key] mas01mj@652: except KeyError: mas01mj@652: project = getNewNode('project') mas01mj@652: graph.add((project, RDF.type, doap["Project"])) mas01mj@652: graph.add((project, doap['name'], Literal(software))) mas01mj@652: projects[key] = project mas01mj@652: return project mas01mj@652: mas01mj@652: def createMediaGraphs(rows): mas01mj@640: albums = {} mas01mj@640: mas01mj@640: artists = { mas01mj@640: 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], mas01mj@640: 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], mas01mj@640: 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} mas01mj@640: mas01mj@640: counter = 1 mas01mj@640: for row in rows: mas01mj@652: graph = Graph(identifier = URIRef(graph_uri)) mas01mj@640: # Create all the relevant nodes (with the correct IDs) mas01mj@640: mas01mj@640: work = getNewNode('work') mas01mj@640: composition = getNewNode('composition') mas01mj@640: track = getNewNode('track') mas01mj@640: record = getNewNode('record') mas01mj@640: performance = getNewNode('performance') mas01mj@652: signal = Namespace(graph_uri+"/"+row['uid']) mas01mj@640: mas01mj@640: # If we don't have an artist url, make a foaf Agent instead. mas01mj@640: if row['artist']: mas01mj@640: try: mas01mj@640: artist = artists[row['artist']] mas01mj@640: except KeyError: mas01mj@640: artist = getNewNode('artist') mas01mj@640: graph.add((artist, RDF.type, foaf['Agent'])) mas01mj@640: graph.add((artist, foaf['name'], Literal(row['artist'].strip()))) mas01mj@640: artists[row['artist']] = artist; mas01mj@640: mas01mj@640: if row['composer']: mas01mj@640: try: mas01mj@640: composer = artists[row['composer']] mas01mj@640: except KeyError: mas01mj@640: composer = getNewNode('artist') mas01mj@640: graph.add((composer, RDF.type, foaf['Agent'])) mas01mj@640: graph.add((composer, foaf['name'], Literal(row['composer'].strip()))) mas01mj@640: artists[row['composer']] = composer; mas01mj@640: else: mas01mj@640: composer = artist mas01mj@640: mas01mj@640: mas01mj@640: # Work mas01mj@640: graph.add((work, RDF.type, mo['MusicalWork'])) mas01mj@640: mas01mj@640: # Composition mas01mj@640: graph.add((composition, RDF.type, mo['Composition'])) mas01mj@640: if composer: mas01mj@640: graph.add((composition, mo['composer'], composer)) mas01mj@640: graph.add((composition, mo['produced_work'], work)) mas01mj@640: mas01mj@640: # Track mas01mj@640: graph.add((track, RDF.type, mo['Track'])) mas01mj@640: if row['artist']: mas01mj@640: graph.add((track, foaf['maker'], artist)) mas01mj@640: if row['tracknum']: mas01mj@640: graph.add((track, mo['track_number'], Literal(row['tracknum']))) mas01mj@640: mas01mj@652: if row['album']: mas01mj@652: # Album mas01mj@652: try: mas01mj@652: album = albums[row['album']] mas01mj@652: except KeyError: mas01mj@652: album = getNewNode('album') mas01mj@652: graph.add((album, RDF.type, mo['Record'])) mas01mj@652: graph.add((album, dc['title'], Literal(row['album'].strip()))) mas01mj@652: graph.add((album, mo['release_type'], mo['album'])) mas01mj@652: albums[row['album']] = album mas01mj@652: graph.add((album, mo['track'], track)) mas01mj@640: mas01mj@640: # Signal mas01mj@640: graph.add((signal, RDF.type, mo['Signal'])) mas01mj@640: graph.add((signal, mo['published_as'], record)) mas01mj@640: mas01mj@640: if row['track']: mas01mj@640: graph.add((signal, dc['title'], Literal(row['track'].strip()))) mas01mj@640: if row['isrc']: mas01mj@640: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) mas01mj@640: mas01mj@652: # Add to the various databases mas01mj@652: dbs = databases[catalogueID] mas01mj@652: for db in dbs: mas01mj@652: graph.add((db, audiodb["has-signal"], signal)) mas01mj@652: mas01mj@640: # Record mas01mj@640: graph.add((record, RDF.type, mo['Record'])) mas01mj@640: graph.add((record, mo['publication_of'], signal)) mas01mj@640: graph.add((record, mo['track'], track)) mas01mj@640: mas01mj@640: # Performance mas01mj@640: graph.add((performance, RDF.type, mo['Performance'])) mas01mj@640: graph.add((performance, mo['performance_of'], work)) mas01mj@640: if row['artist']: mas01mj@640: graph.add((performance, mo['performer'], artist)) mas01mj@640: graph.add((performance, mo['recorded_as'], signal)) mas01mj@640: mas01mj@652: graph.close() mas01mj@652: graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") mas01mj@640: counter += 1 mas01mj@640: mas01mj@640: def getNewNode(type): mas01mj@640: try: mas01mj@640: count = counters[type] mas01mj@640: except KeyError: mas01mj@640: counters[type] = 1 mas01mj@640: count = counters[type] mas01mj@640: mas01mj@640: try: mas01mj@640: namespace = namespaces[type] mas01mj@640: except KeyError: mas01mj@652: namespaces[type] = Namespace(graph_uri+"/"+type+"/") mas01mj@640: namespace = namespaces[type] mas01mj@640: mas01mj@640: node = namespace[str(count)] mas01mj@640: counters[type] += 1 mas01mj@640: return node mas01mj@640: mas01mj@652: features = loadFeatures(catalogueID) mas01mj@652: catalogue = loadCatalogue(catalogueID) mas01mj@652: mas01mj@652: createFeatureGraphs(features) mas01mj@652: createMediaGraphs(catalogue)