diff pdfextract/parsejAudio.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pdfextract/parsejAudio.py	Mon Feb 25 14:47:41 2013 +0000
@@ -0,0 +1,67 @@
+import rdflib, os, fnmatch, urllib2
+from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal
+from xml.dom.minidom import parseString
+
+execfile('/Users/alo/Downloads/python-Levenshtein-0.10.2/StringMatcher.py')
+
+jdir = '/Users/alo/Development/MIR/jAudio/jAudio/'
+
+jdict = {}
+
+file = urllib2.urlopen('file://' + jdir + 'features.xml')
+data = xml.read()
+file.close()
+
+dom = parseString(data)
+jmir = dom.getElementsByTagName('feature')
+
+for nodes in jmir:
+    jname = nodes.childNodes[1].firstChild.nodeValue.split('.')[-1]
+    jdict[jname] = {'score': 0, 'name': ""} 
+#    if len(nodes.childNodes) == 5:
+#        print nodes.childNodes[3]
+    score = 100
+
+    for s, p, o in current.triples((None, None, RDFS.Resource)):                                    
+        for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)):
+            m = StringMatcher()
+            m.set_seqs(Literal(jname), name)
+            sc = float(m.distance()) / ((len(jname) + len(name)) / 2.0)
+            if sc < score:
+                jdict[jname]['score'] = 1.0 - sc
+                jdict[jname]['name'] = name
+                score = sc
+            
+    if jdict[jname]['score'] < 0.75:                
+        for k in abbr.keys():
+            if abbr[k] == jname:
+                madict[cl]['score'] = 1.0
+                madict[cl]['name'] = k
+
+for k in jdict.keys():
+    if jdict[k]['score'] > 0.75:
+        name = jdict[k]['name']
+        id = local + (name.replace(' ','').replace('-',''))
+        afgr.add(( 
+            URIRef(id),
+            URIRef(local + 'computedBy'),
+            Literal('jMIR')
+        ))
+    else:
+        id = local + (k.replace(' ','').replace('-',''))
+        afgr.add(( URIRef(id),        
+            URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+            URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource')  
+        ))
+    
+        afgr.add(( 
+            URIRef(id), 
+            URIRef(local + 'feature'), 
+            Literal(k) 
+        ))
+                
+        afgr.add(( 
+            URIRef(id),
+            URIRef(local + 'computedBy'),
+            Literal('jMIR')
+        ))