comparison pdfextract/parsejAudio.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children
comparison
equal deleted inserted replaced
0:62d2c72e4223 1:365a37a2fb6c
1 import rdflib, os, fnmatch, urllib2
2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal
3 from xml.dom.minidom import parseString
4
5 execfile('/Users/alo/Downloads/python-Levenshtein-0.10.2/StringMatcher.py')
6
7 jdir = '/Users/alo/Development/MIR/jAudio/jAudio/'
8
9 jdict = {}
10
11 file = urllib2.urlopen('file://' + jdir + 'features.xml')
12 data = xml.read()
13 file.close()
14
15 dom = parseString(data)
16 jmir = dom.getElementsByTagName('feature')
17
18 for nodes in jmir:
19 jname = nodes.childNodes[1].firstChild.nodeValue.split('.')[-1]
20 jdict[jname] = {'score': 0, 'name': ""}
21 # if len(nodes.childNodes) == 5:
22 # print nodes.childNodes[3]
23 score = 100
24
25 for s, p, o in current.triples((None, None, RDFS.Resource)):
26 for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)):
27 m = StringMatcher()
28 m.set_seqs(Literal(jname), name)
29 sc = float(m.distance()) / ((len(jname) + len(name)) / 2.0)
30 if sc < score:
31 jdict[jname]['score'] = 1.0 - sc
32 jdict[jname]['name'] = name
33 score = sc
34
35 if jdict[jname]['score'] < 0.75:
36 for k in abbr.keys():
37 if abbr[k] == jname:
38 madict[cl]['score'] = 1.0
39 madict[cl]['name'] = k
40
41 for k in jdict.keys():
42 if jdict[k]['score'] > 0.75:
43 name = jdict[k]['name']
44 id = local + (name.replace(' ','').replace('-',''))
45 afgr.add((
46 URIRef(id),
47 URIRef(local + 'computedBy'),
48 Literal('jMIR')
49 ))
50 else:
51 id = local + (k.replace(' ','').replace('-',''))
52 afgr.add(( URIRef(id),
53 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
54 URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource')
55 ))
56
57 afgr.add((
58 URIRef(id),
59 URIRef(local + 'feature'),
60 Literal(k)
61 ))
62
63 afgr.add((
64 URIRef(id),
65 URIRef(local + 'computedBy'),
66 Literal('jMIR')
67 ))