diff pdfextract/writeRDFs.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pdfextract/writeRDFs.py	Mon Feb 25 14:47:41 2013 +0000
@@ -0,0 +1,272 @@
+import rdflib, os, fnmatch, urllib2
+from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
+from xml.dom.minidom import parseString
+
+############# Vamp ###############
+
+vampdir = '/Users/alo/Library/Audio/Plug-Ins/Vamp/'
+
+source = Graph()
+
+graph = Graph()
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+
+for name in os.listdir(vampdir):
+    if fnmatch.fnmatch(name, '*.n3'):
+        
+        print (vampdir + name)
+        
+        source.parse(vampdir + name, format='n3')
+        
+        for su, pr in source.subject_predicates(URIRef('http://purl.org/ontology/vamp/Plugin')):
+                        
+            for name in source.objects(su, URIRef('http://purl.org/ontology/vamp/name')):
+                id = name.replace(' ', '').replace('Marsyas-BatchFeatureExtract-', '')
+                feature = name
+                graph.add((
+                    URIRef(id), 
+                    URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+                    URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+                ))
+                graph.add((
+                    URIRef(id), 
+                    URIRef(local+'feature'), 
+                    Literal(feature)
+                ))
+            for domain in source.objects(su, URIRef('http://purl.org/ontology/vamp/input_domain')):
+                dom = domain.split('/')[-1].replace('Domain', '')
+                dom = dom.lower().replace('Time', 'temporal')
+                graph.add((
+                    URIRef(id), 
+                    URIRef(local+'domain'), 
+                    Literal(dom)
+                ))
+            for desc in source.objects(su, URIRef('http://purl.org/dc/elements/1.1/description')):
+                description = " ".join(desc.split())
+                graph.add((
+                    URIRef(id), 
+                    URIRef('http://purl.org/dc/elements/1.1/description'), 
+                    Literal(description)
+                ))  
+            for maker in source.objects(su, URIRef('http://xmlns.com/foaf/0.1/maker')):
+                for mname in source.objects(maker, URIRef('http://xmlns.com/foaf/0.1/name')):
+                    makername = mname
+                    graph.add((
+                        URIRef(id), 
+                        URIRef(local+'source'), 
+                        Literal(makername)
+                    ))
+                
+            count=sum(1 for _ in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')))
+            
+            if count == 1:
+               for it in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')): 
+                    for output in source.objects(it, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')):
+                        out = output.split('/')[-1].replace('Output', '')
+                        if out.find('Sparse') >= 0 or out.find('Dense') >= 0:
+                            graph.add((
+                                URIRef(id), 
+                                URIRef(local+'output'), 
+                                Literal(out)
+                            ))
+                 
+            else:
+                for it in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')):
+                    for name in source.objects(it, URIRef('http://purl.org/dc/elements/1.1/title')):
+                        if name != feature:
+                            subid = name.replace(' ', '')
+                            graph.add((
+                                URIRef(subid), 
+                                URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+                                URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+                            ))
+                            graph.add((
+                                URIRef(subid), 
+                                URIRef(local+'feature'), 
+                                Literal(name + " (" + feature + ")")
+                            ))
+                            graph.add((
+                                URIRef(subid), 
+                                URIRef(local+'domain'), 
+                                Literal(dom)
+                            ))
+                            graph.add((
+                                URIRef(subid), 
+                                URIRef(local+'source'), 
+                                Literal(makername)
+                            ))
+                            for output in source.objects(it, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')):
+                                out = output.split('/')[-1].replace('Output', '')
+                                if out.find('Sparse') >= 0 or out.find('Dense') >= 0:
+                                    graph.add((
+                                        URIRef(subid), 
+                                        URIRef(local+'output'), 
+                                        Literal(out)
+                                    ))
+
+graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Vamp.rdf')
+
+############# Marsyas ###############
+mdir = '/Users/alo/Development/MIR/marsyas-0.4.7/src/marsyas/'
+
+madict = {}
+
+graph = Graph()
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+
+for name in os.listdir(mdir):
+    if fnmatch.fnmatch(name, '*.h'):
+        code = [line.strip() for line in open(mdir + name)]
+        found = False
+        for line in code:
+            if line.find('\ingroup Analysis') >= 0:
+                found = True
+                break
+        
+        if found:
+            i = 0
+            cl = ''
+            for line in code:
+                if line.find('\class') >= 0:
+                    cl = line.split(' ')[-1]
+                    madict[cl] = {'brief': code[i+2][7:]} 
+                    if code[i+3] != '':
+                        madict[cl]['brief'] += code[i+3]
+                        
+                    break
+                    
+                i += 1
+
+            graph.add((
+                URIRef(cl), 
+                URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+                URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+            ))
+
+            graph.add((
+                URIRef(cl), 
+                URIRef('http://purl.org/dc/elements/1.1/description'), 
+                Literal(madict[cl]['brief'])
+            ))
+            
+graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Marsyas.rdf')
+
+############# jMIR ###############
+jdir = '/Users/alo/Development/MIR/jAudio/jAudio/'
+jsrc = '/Users/alo/Development/MIR/jAudio/jAudio/src/jAudioFeatureExtractor/AudioFeatures/'
+
+file = urllib2.urlopen('file://' + jdir + 'features.xml')
+data = file.read()
+file.close()
+
+dom = parseString(data)
+jmir = dom.getElementsByTagName('feature')
+
+graph = Graph()
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+
+for nodes in jmir:
+    jname = nodes.childNodes[1].firstChild.nodeValue.split('.')[-1]    
+    graph.add((
+        URIRef(jname), 
+        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+        URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+    ))
+    file = open(jsrc + jname + '.java')
+    code = file.read()
+    searchstr = 'String name ='
+    start = code.find(searchstr) + len(searchstr)
+    start = code.find('"', start) + 1
+    end = code.find('"', start)
+    name = code[start:end]
+        
+    if name > "":
+        graph.add((
+            URIRef(jname), 
+            URIRef(local+'name'), 
+            Literal(name)
+        ))
+
+    searchstr = 'String description'
+    start = code.find(searchstr) + len(searchstr)
+    start = code.find('"', start) + 1
+    end = code.find('";', start)
+    desc = code[start:end]
+    desc = desc.replace("
", "").replace("\t", "").replace('\n', '').replace('+', '').replace('"', '').replace(';', '').replace('//\n', '')
+    desc = " ".join(desc.split())
+    
+    if desc > "":
+        graph.add((
+            URIRef(jname), 
+            URIRef('http://purl.org/dc/elements/1.1/description'), 
+            Literal(desc)
+        ))
+        
+
+graph.serialize('/Users/alo/MusicOntology/features/rdf/af-jMIR.rdf')
+
+
+############# yaafe ###############
+
+graph = Graph()
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+
+path = "/Users/alo/Development/MIR/yaafe-v0.64/src_python/yaafefeatures.py"
+
+lines = [line.strip() for line in open(path)]
+
+count = 0
+
+for ln in lines:
+    if ln.find('class ') >= 0:
+        yname = ln[6:ln.find('(AudioFeature)')]
+        desc = lines[count+2]
+        desc = desc.replace("`", "").replace("<", "").replace(">", "")
+        desc = " ".join(desc.split())
+        
+        graph.add((
+            URIRef(yname), 
+            URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+            URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+        ))
+        
+        graph.add((
+            URIRef(yname), 
+            URIRef('http://purl.org/dc/elements/1.1/description'), 
+            Literal(desc)
+        ))
+        
+    count += 1
+        
+graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Yaafe.rdf')
+
+############# libXtract ###############
+graph = Graph()
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+
+path = '/Users/alo/Development/MIR/LibXtract/xtract/libxtract.h'
+
+lines = [line.strip() for line in open(path)]
+
+xtract = lines[(lines.index('enum xtract_features_ {')+1):(lines.index('XTRACT_WINDOWED')-1)]
+
+for ln in xtract:
+    xname = ln[(ln.index('_')+1):-1].replace("_", " ").lower().capitalize()
+
+    graph.add((
+        URIRef(xname), 
+        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+        URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
+    ))
+
+graph.serialize('/Users/alo/MusicOntology/features/rdf/af-libXtract.rdf')