Mercurial > hg > audiodb
view examples/browser/export/cat2rdf.py @ 663:bcc7a6ddb2c8
Better treatment of refine.hopsize
Only adapt the query loop hopping if ADB_REFINE_HOP_SIZE is actually on the
refine flags, rather than blithely taking the value of refine.hopsize.
Significantly reduces the chances of inadvertently causing an infinite loop.
Adjust the tests to remove the 23 now useless instances of
"refine.hopsize = 1;"
Closes trac ticket #16
author | mas01cr |
---|---|
date | Thu, 28 Jan 2010 10:23:42 +0000 |
parents | 1a251dd217c6 |
children |
line wrap: on
line source
#!/usr/bin/python import sys import psycopg2 import psycopg2.extras from rdflib.Graph import ConjunctiveGraph as Graph from rdflib import Namespace, Literal, URIRef, BNode, RDF catalogueID = sys.argv[1] foaf = Namespace("http://xmlns.com/foaf/0.1/") mo = Namespace("http://purl.org/ontology/mo/") mb_artist = Namespace("http://dbtune.org/musicbrainz/resource/artist/") dc = Namespace("http://purl.org/dc/elements/1.1/") graph_uri = "http://omras2.gold.ac.uk/catalogue/"+catalogueID.lower() audiodb = Namespace("http://omras2.gold.ac.uk/ontology/audiodb#") doap = Namespace("http://usefulinc.com/ns/doap#") username = "USERNAME" host = "HOSTNAME" database = "DATABASE" try: conn = psycopg2.connect("dbname='"+database+"' user='"+username+"' host='"+host+"'"); except: print "Unable to connect to the database" cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) counters = {} namespaces = {} extractors = {} projects = {} databases = {} def loadFeatures(catalogueID): cursor.execute("""SELECT * from features WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() return rows def loadCatalogue(catalogueID): cursor.execute("""SELECT * from media WHERE catalogue LIKE '"""+catalogueID+"""'""") rows = cursor.fetchall() return rows def createFeatureGraphs(rows): albums = {} graph = Graph(identifier = URIRef(graph_uri)) counter = 1 databases[catalogueID] = [] for row in rows: # Create all the relevant nodes (with the correct IDs) database = getNewNode('database') databases[catalogueID].append(database) feature = getNewNode('feature') segmentation = getNewNode('segmentation') window = getNewNode('window') if row['feature'] == "cqt": graph.add((feature, RDF.type, audiodb['CQTFeature'])) elif row['feature'] == "chr": graph.add((feature, RDF.type, audiodb['ChromagramFeature'])) elif row['feature'] == "mfcc": graph.add((feature, RDF.type, audiodb['MFCCFeature'])) if row['segtype'] == "frames": graph.add((segmentation, RDF.type, audiodb['FrameSegmentation'])) elif row['segtype'] == "beats": graph.add((segmentation, RDF.type, audiodb['BeatSegmentation'])) elif row['segtype'] == "segs": graph.add((segmentation, RDF.type, audiodb['StructuralSegmentation'])) if row['windowtype'] == "hamming": graph.add((window, RDF.type, audiodb['HammingWindow'])) graph.add((feature, audiodb["window"], window)) graph.add((feature, audiodb["segmentation"], segmentation)) graph.add((feature, audiodb["dimension"], Literal(row['dim']))) graph.add((feature, audiodb["hop-size"], Literal(row['hopsize']))) graph.add((feature, audiodb["window-length"], Literal(row['winlen']))) graph.add((feature, audiodb["nfft"], Literal(row['nfft']))) graph.add((feature, audiodb["segn"], Literal(row['segn']))) graph.add((feature, audiodb["channel"], Literal(row['channel']))) graph.add((feature, audiodb["loedge"], Literal(row['loedge']))) graph.add((feature, audiodb["hiedge"], Literal(row['hiedge']))) graph.add((feature, audiodb["octaveres"], Literal(row['octaveres']))) version = buildNewExtractor(graph, row['software'], row['version'], row['platform']) project = buildNewProject(graph, row['software']) graph.add((project, doap['release'], version)) graph.add((database, RDF.type, audiodb["Database"])) graph.add((database, audiodb["feature"], feature)) graph.add((database, audiodb["extractor"], version)) counter += 1 graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/"+"features.rdf") def buildNewExtractor(graph, software, version, platform): key = software+"_"+version+"_"+platform try: extractor = extractors[key] except KeyError: extractor = getNewNode('extractor') graph.add((extractor, RDF.type, doap["Version"])) graph.add((extractor, doap['version'], Literal(version))) graph.add((extractor, doap['name'], Literal(software))) graph.add((extractor, doap['os'], Literal(platform))) extractors[key] = extractor return extractor def buildNewProject(graph, software): key = software try: project = projects[key] except KeyError: project = getNewNode('project') graph.add((project, RDF.type, doap["Project"])) graph.add((project, doap['name'], Literal(software))) projects[key] = project return project def createMediaGraphs(rows): albums = {} artists = { 'Madonna': mb_artist['79239441-bfd5-4981-a70c-55c3f15c1287'], 'John Coltrane': mb_artist['b625448e-bf4a-41c3-a421-72ad46cdb831'], 'Miles Davis' : mb_artist['561d854a-6a28-4aa7-8c99-323e6ce46c2a']} counter = 1 for row in rows: graph = Graph(identifier = URIRef(graph_uri)) # Create all the relevant nodes (with the correct IDs) work = getNewNode('work') composition = getNewNode('composition') track = getNewNode('track') record = getNewNode('record') performance = getNewNode('performance') signal = Namespace(graph_uri+"/"+row['uid']) # If we don't have an artist url, make a foaf Agent instead. if row['artist']: try: artist = artists[row['artist']] except KeyError: artist = getNewNode('artist') graph.add((artist, RDF.type, foaf['Agent'])) graph.add((artist, foaf['name'], Literal(row['artist'].strip()))) artists[row['artist']] = artist; if row['composer']: try: composer = artists[row['composer']] except KeyError: composer = getNewNode('artist') graph.add((composer, RDF.type, foaf['Agent'])) graph.add((composer, foaf['name'], Literal(row['composer'].strip()))) artists[row['composer']] = composer; else: composer = artist # Work graph.add((work, RDF.type, mo['MusicalWork'])) # Composition graph.add((composition, RDF.type, mo['Composition'])) if composer: graph.add((composition, mo['composer'], composer)) graph.add((composition, mo['produced_work'], work)) # Track graph.add((track, RDF.type, mo['Track'])) if row['artist']: graph.add((track, foaf['maker'], artist)) if row['tracknum']: graph.add((track, mo['track_number'], Literal(row['tracknum']))) if row['album']: # Album try: album = albums[row['album']] except KeyError: album = getNewNode('album') graph.add((album, RDF.type, mo['Record'])) graph.add((album, dc['title'], Literal(row['album'].strip()))) graph.add((album, mo['release_type'], mo['album'])) albums[row['album']] = album graph.add((album, mo['track'], track)) # Signal graph.add((signal, RDF.type, mo['Signal'])) graph.add((signal, mo['published_as'], record)) if row['track']: graph.add((signal, dc['title'], Literal(row['track'].strip()))) if row['isrc']: graph.add((signal, mo['isrc'], Literal(row['isrc'].strip()))) # Add to the various databases dbs = databases[catalogueID] for db in dbs: graph.add((db, audiodb["has-signal"], signal)) # Record graph.add((record, RDF.type, mo['Record'])) graph.add((record, mo['publication_of'], signal)) graph.add((record, mo['track'], track)) # Performance graph.add((performance, RDF.type, mo['Performance'])) graph.add((performance, mo['performance_of'], work)) if row['artist']: graph.add((performance, mo['performer'], artist)) graph.add((performance, mo['recorded_as'], signal)) graph.close() graph.serialize(format='xml',destination="output/"+catalogueID.lower()+"/media_"+str(counter)+".rdf") counter += 1 def getNewNode(type): try: count = counters[type] except KeyError: counters[type] = 1 count = counters[type] try: namespace = namespaces[type] except KeyError: namespaces[type] = Namespace(graph_uri+"/"+type+"/") namespace = namespaces[type] node = namespace[str(count)] counters[type] += 1 return node features = loadFeatures(catalogueID) catalogue = loadCatalogue(catalogueID) createFeatureGraphs(features) createMediaGraphs(catalogue)