nothing@1: import rdflib, os, fnmatch, urllib2 nothing@1: from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL nothing@1: from xml.dom.minidom import parseString nothing@1: nothing@1: names = [line.strip() for line in open('pdfextract/names.txt')] nothing@1: cat = [line.strip() for line in open('pdfextract/categories.txt')] nothing@1: sig = [line.strip() for line in open('pdfextract/sig.txt')] nothing@1: nothing@4: graph = Graph() nothing@1: nothing@4: af = Namespace('http://sovarr.c4dm.eecs.qmul.ac.uk/features/') nothing@4: graph.bind('af', af) nothing@4: nothing@4: dc = Namespace('http://purl.org/dc/elements/1.1/') nothing@4: graph.bind('dc', dc) nothing@4: nothing@4: owl = Namespace('http://www.w3.org/2002/07/owl#') nothing@4: graph.bind('owl', owl) nothing@1: nothing@1: i = 0 nothing@1: nothing@1: order = [ nothing@1: "Zero Crossing Rate", nothing@1: "Linear Predictive Coding", nothing@1: "Mel-scale Frequency Cepstral Coefficients", nothing@1: "Auditory Filter Bank Temporal Envelopes", nothing@1: "Rate-scale-frequency Features", nothing@1: "Phase Space Features" nothing@1: ] nothing@1: nothing@1: domains = { nothing@1: "Zero Crossing Rate": 'temporal', nothing@1: "Linear Predictive Coding": 'frequency', nothing@1: "Mel-scale Frequency Cepstral Coefficients": 'cepstral', nothing@1: "Auditory Filter Bank Temporal Envelopes": 'modulation frequency', nothing@1: "Rate-scale-frequency Features": 'eigendomain', nothing@1: "Phase Space Features": 'phase space' nothing@1: } nothing@1: nothing@1: abbr = { nothing@1: "Zero Crossing Rate": "ZCR", nothing@1: "Mel-scale Frequency Cepstral Coefficients": "MFCC", nothing@1: "Linear Predictive Coding": "LPC", nothing@1: "Linear Prediction Cepstral Coefficients": "LPCC", nothing@1: "Zero crossing peak amplitudes": "ZCPA", nothing@1: "Line spectral frequencies": "LSF", nothing@1: "Short-time energy": "STE", nothing@1: "Amplitude descriptor": "AD", nothing@1: "Adaptive time frequency transform": "ATFT", nothing@1: "Daubechies Wavelet coefficient histogram": "DWCH", nothing@1: "Spectral Flux": "SF", nothing@1: "Group delay function": "GDF", nothing@1: "Modified group delay function": "MGDF", nothing@1: "Spectral centroid": "SC", nothing@1: "Subband spectral flux": "SSF", nothing@1: "Perceptual linear prediction": "PLP" nothing@1: } nothing@1: nothing@1: nothing@1: domain = "" nothing@1: domainIndex = 0 nothing@1: compdict = {} nothing@1: nothing@1: for filename in ['filters', 'trans', 'aggr']: nothing@1: for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]: nothing@1: compdict[line[0]] = line[2:] nothing@1: nothing@1: nothing@1: nothing@1: for name in names: nothing@1: id = local + (name.replace(' ','').replace('-','')) nothing@1: nothing@1: if name == order[domainIndex]: nothing@1: domain = domains[order[domainIndex]] nothing@1: domainIndex += 1 nothing@1: nothing@1: graph.add(( URIRef(id), nothing@1: URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), nothing@1: URIRef(OWL.Class) nothing@1: )) nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'feature'), nothing@1: Literal(name.replace(' ','').replace('-','')) nothing@1: )) nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'domain'), nothing@1: Literal(domain) nothing@1: )) nothing@1: nothing@1: word = cat[i].split(' ') nothing@1: nothing@1: temp = { nothing@1: 'I': 'intraframe', nothing@1: 'X': 'interframe', nothing@1: 'G': 'global' nothing@1: }[word[0]] nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'temporalscale'), nothing@1: Literal(temp) nothing@1: )) nothing@1: nothing@1: if word[1] == 'Y': nothing@1: temp = 'perceptual' nothing@1: else: nothing@1: temp = 'physical' nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'level'), nothing@1: Literal(temp) nothing@1: )) nothing@1: nothing@1: if word[2] == 'Y': nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'model'), nothing@1: Literal('psychoacoustic') nothing@1: )) nothing@1: nothing@1: temp = { nothing@1: 'L': 'low', nothing@1: 'M': 'medium', nothing@1: 'H': 'high' nothing@1: }[word[3]] nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'complexity'), nothing@1: Literal(temp) nothing@1: )) nothing@1: nothing@1: if word[4] == 'V': nothing@1: temp = 'parameterized' nothing@4: dim = 'multi-dimensional' nothing@1: else: nothing@1: temp = word[4] nothing@4: if int(temp) == 1: nothing@4: dim = 'one-dimensional' nothing@4: else: nothing@4: dim = 'multi-dimensional' nothing@1: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'dimensions'), nothing@1: Literal(temp) nothing@1: )) nothing@4: nothing@4: graph.add(( nothing@4: URIRef(id), nothing@4: URIRef(local + 'dimensionality'), nothing@4: Literal(dim) nothing@4: )) nothing@1: nothing@1: temp = { nothing@1: 'ASR': "speech recognition", nothing@1: 'ESR': "environmental sound recognition", nothing@1: 'MIR': "music information retrieval", nothing@1: 'AS': "audio segmentation", nothing@1: 'FP': "fingerprinting", nothing@1: 'VAR': "several", nothing@1: 'EXC': '' nothing@1: }[word[5]] nothing@1: nothing@1: if temp != '': nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'appdomain'), nothing@1: Literal(temp) nothing@1: )) nothing@1: nothing@1: steps = sig[i].split(' ') nothing@1: nothing@1: for key in steps: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'computation'), nothing@1: Literal(compdict[key]) nothing@1: )) nothing@1: nothing@1: if name.find('MPEG-7') >= 0: nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'computedIn'), nothing@1: Literal('MPEG-7') nothing@1: )) nothing@1: nothing@1: if name in abbr.keys(): nothing@1: graph.add(( nothing@1: URIRef(id), nothing@1: URIRef(local + 'abbreviation'), nothing@1: Literal(abbr[name]) nothing@1: )) nothing@1: nothing@1: nothing@1: i += 1 nothing@1: nothing@1: nothing@1: graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')