annotate pdfextract/writeRDFs.py @ 18:d5012016bf64 tip

added rdfpy and rdfonto directories
author nothing@tehis.net
date Tue, 23 Apr 2013 11:49:20 +0100
parents 365a37a2fb6c
children
rev   line source
nothing@1 1 import rdflib, os, fnmatch, urllib2
nothing@1 2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
nothing@1 3 from xml.dom.minidom import parseString
nothing@1 4
nothing@1 5 ############# Vamp ###############
nothing@1 6
nothing@1 7 vampdir = '/Users/alo/Library/Audio/Plug-Ins/Vamp/'
nothing@1 8
nothing@1 9 source = Graph()
nothing@1 10
nothing@1 11 graph = Graph()
nothing@1 12 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 13 graph.bind('local', URIRef(local))
nothing@1 14 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 15
nothing@1 16 for name in os.listdir(vampdir):
nothing@1 17 if fnmatch.fnmatch(name, '*.n3'):
nothing@1 18
nothing@1 19 print (vampdir + name)
nothing@1 20
nothing@1 21 source.parse(vampdir + name, format='n3')
nothing@1 22
nothing@1 23 for su, pr in source.subject_predicates(URIRef('http://purl.org/ontology/vamp/Plugin')):
nothing@1 24
nothing@1 25 for name in source.objects(su, URIRef('http://purl.org/ontology/vamp/name')):
nothing@1 26 id = name.replace(' ', '').replace('Marsyas-BatchFeatureExtract-', '')
nothing@1 27 feature = name
nothing@1 28 graph.add((
nothing@1 29 URIRef(id),
nothing@1 30 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 31 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 32 ))
nothing@1 33 graph.add((
nothing@1 34 URIRef(id),
nothing@1 35 URIRef(local+'feature'),
nothing@1 36 Literal(feature)
nothing@1 37 ))
nothing@1 38 for domain in source.objects(su, URIRef('http://purl.org/ontology/vamp/input_domain')):
nothing@1 39 dom = domain.split('/')[-1].replace('Domain', '')
nothing@1 40 dom = dom.lower().replace('Time', 'temporal')
nothing@1 41 graph.add((
nothing@1 42 URIRef(id),
nothing@1 43 URIRef(local+'domain'),
nothing@1 44 Literal(dom)
nothing@1 45 ))
nothing@1 46 for desc in source.objects(su, URIRef('http://purl.org/dc/elements/1.1/description')):
nothing@1 47 description = " ".join(desc.split())
nothing@1 48 graph.add((
nothing@1 49 URIRef(id),
nothing@1 50 URIRef('http://purl.org/dc/elements/1.1/description'),
nothing@1 51 Literal(description)
nothing@1 52 ))
nothing@1 53 for maker in source.objects(su, URIRef('http://xmlns.com/foaf/0.1/maker')):
nothing@1 54 for mname in source.objects(maker, URIRef('http://xmlns.com/foaf/0.1/name')):
nothing@1 55 makername = mname
nothing@1 56 graph.add((
nothing@1 57 URIRef(id),
nothing@1 58 URIRef(local+'source'),
nothing@1 59 Literal(makername)
nothing@1 60 ))
nothing@1 61
nothing@1 62 count=sum(1 for _ in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')))
nothing@1 63
nothing@1 64 if count == 1:
nothing@1 65 for it in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')):
nothing@1 66 for output in source.objects(it, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')):
nothing@1 67 out = output.split('/')[-1].replace('Output', '')
nothing@1 68 if out.find('Sparse') >= 0 or out.find('Dense') >= 0:
nothing@1 69 graph.add((
nothing@1 70 URIRef(id),
nothing@1 71 URIRef(local+'output'),
nothing@1 72 Literal(out)
nothing@1 73 ))
nothing@1 74
nothing@1 75 else:
nothing@1 76 for it in source.objects(su, URIRef('http://purl.org/ontology/vamp/output')):
nothing@1 77 for name in source.objects(it, URIRef('http://purl.org/dc/elements/1.1/title')):
nothing@1 78 if name != feature:
nothing@1 79 subid = name.replace(' ', '')
nothing@1 80 graph.add((
nothing@1 81 URIRef(subid),
nothing@1 82 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 83 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 84 ))
nothing@1 85 graph.add((
nothing@1 86 URIRef(subid),
nothing@1 87 URIRef(local+'feature'),
nothing@1 88 Literal(name + " (" + feature + ")")
nothing@1 89 ))
nothing@1 90 graph.add((
nothing@1 91 URIRef(subid),
nothing@1 92 URIRef(local+'domain'),
nothing@1 93 Literal(dom)
nothing@1 94 ))
nothing@1 95 graph.add((
nothing@1 96 URIRef(subid),
nothing@1 97 URIRef(local+'source'),
nothing@1 98 Literal(makername)
nothing@1 99 ))
nothing@1 100 for output in source.objects(it, URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type')):
nothing@1 101 out = output.split('/')[-1].replace('Output', '')
nothing@1 102 if out.find('Sparse') >= 0 or out.find('Dense') >= 0:
nothing@1 103 graph.add((
nothing@1 104 URIRef(subid),
nothing@1 105 URIRef(local+'output'),
nothing@1 106 Literal(out)
nothing@1 107 ))
nothing@1 108
nothing@1 109 graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Vamp.rdf')
nothing@1 110
nothing@1 111 ############# Marsyas ###############
nothing@1 112 mdir = '/Users/alo/Development/MIR/marsyas-0.4.7/src/marsyas/'
nothing@1 113
nothing@1 114 madict = {}
nothing@1 115
nothing@1 116 graph = Graph()
nothing@1 117 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 118 graph.bind('local', URIRef(local))
nothing@1 119 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 120
nothing@1 121 for name in os.listdir(mdir):
nothing@1 122 if fnmatch.fnmatch(name, '*.h'):
nothing@1 123 code = [line.strip() for line in open(mdir + name)]
nothing@1 124 found = False
nothing@1 125 for line in code:
nothing@1 126 if line.find('\ingroup Analysis') >= 0:
nothing@1 127 found = True
nothing@1 128 break
nothing@1 129
nothing@1 130 if found:
nothing@1 131 i = 0
nothing@1 132 cl = ''
nothing@1 133 for line in code:
nothing@1 134 if line.find('\class') >= 0:
nothing@1 135 cl = line.split(' ')[-1]
nothing@1 136 madict[cl] = {'brief': code[i+2][7:]}
nothing@1 137 if code[i+3] != '':
nothing@1 138 madict[cl]['brief'] += code[i+3]
nothing@1 139
nothing@1 140 break
nothing@1 141
nothing@1 142 i += 1
nothing@1 143
nothing@1 144 graph.add((
nothing@1 145 URIRef(cl),
nothing@1 146 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 147 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 148 ))
nothing@1 149
nothing@1 150 graph.add((
nothing@1 151 URIRef(cl),
nothing@1 152 URIRef('http://purl.org/dc/elements/1.1/description'),
nothing@1 153 Literal(madict[cl]['brief'])
nothing@1 154 ))
nothing@1 155
nothing@1 156 graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Marsyas.rdf')
nothing@1 157
nothing@1 158 ############# jMIR ###############
nothing@1 159 jdir = '/Users/alo/Development/MIR/jAudio/jAudio/'
nothing@1 160 jsrc = '/Users/alo/Development/MIR/jAudio/jAudio/src/jAudioFeatureExtractor/AudioFeatures/'
nothing@1 161
nothing@1 162 file = urllib2.urlopen('file://' + jdir + 'features.xml')
nothing@1 163 data = file.read()
nothing@1 164 file.close()
nothing@1 165
nothing@1 166 dom = parseString(data)
nothing@1 167 jmir = dom.getElementsByTagName('feature')
nothing@1 168
nothing@1 169 graph = Graph()
nothing@1 170 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 171 graph.bind('local', URIRef(local))
nothing@1 172 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 173
nothing@1 174 for nodes in jmir:
nothing@1 175 jname = nodes.childNodes[1].firstChild.nodeValue.split('.')[-1]
nothing@1 176 graph.add((
nothing@1 177 URIRef(jname),
nothing@1 178 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 179 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 180 ))
nothing@1 181 file = open(jsrc + jname + '.java')
nothing@1 182 code = file.read()
nothing@1 183 searchstr = 'String name ='
nothing@1 184 start = code.find(searchstr) + len(searchstr)
nothing@1 185 start = code.find('"', start) + 1
nothing@1 186 end = code.find('"', start)
nothing@1 187 name = code[start:end]
nothing@1 188
nothing@1 189 if name > "":
nothing@1 190 graph.add((
nothing@1 191 URIRef(jname),
nothing@1 192 URIRef(local+'name'),
nothing@1 193 Literal(name)
nothing@1 194 ))
nothing@1 195
nothing@1 196 searchstr = 'String description'
nothing@1 197 start = code.find(searchstr) + len(searchstr)
nothing@1 198 start = code.find('"', start) + 1
nothing@1 199 end = code.find('";', start)
nothing@1 200 desc = code[start:end]
nothing@1 201 desc = desc.replace("
", "").replace("\t", "").replace('\n', '').replace('+', '').replace('"', '').replace(';', '').replace('//\n', '')
nothing@1 202 desc = " ".join(desc.split())
nothing@1 203
nothing@1 204 if desc > "":
nothing@1 205 graph.add((
nothing@1 206 URIRef(jname),
nothing@1 207 URIRef('http://purl.org/dc/elements/1.1/description'),
nothing@1 208 Literal(desc)
nothing@1 209 ))
nothing@1 210
nothing@1 211
nothing@1 212 graph.serialize('/Users/alo/MusicOntology/features/rdf/af-jMIR.rdf')
nothing@1 213
nothing@1 214
nothing@1 215 ############# yaafe ###############
nothing@1 216
nothing@1 217 graph = Graph()
nothing@1 218 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 219 graph.bind('local', URIRef(local))
nothing@1 220 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 221
nothing@1 222 path = "/Users/alo/Development/MIR/yaafe-v0.64/src_python/yaafefeatures.py"
nothing@1 223
nothing@1 224 lines = [line.strip() for line in open(path)]
nothing@1 225
nothing@1 226 count = 0
nothing@1 227
nothing@1 228 for ln in lines:
nothing@1 229 if ln.find('class ') >= 0:
nothing@1 230 yname = ln[6:ln.find('(AudioFeature)')]
nothing@1 231 desc = lines[count+2]
nothing@1 232 desc = desc.replace("`", "").replace("<", "").replace(">", "")
nothing@1 233 desc = " ".join(desc.split())
nothing@1 234
nothing@1 235 graph.add((
nothing@1 236 URIRef(yname),
nothing@1 237 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 238 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 239 ))
nothing@1 240
nothing@1 241 graph.add((
nothing@1 242 URIRef(yname),
nothing@1 243 URIRef('http://purl.org/dc/elements/1.1/description'),
nothing@1 244 Literal(desc)
nothing@1 245 ))
nothing@1 246
nothing@1 247 count += 1
nothing@1 248
nothing@1 249 graph.serialize('/Users/alo/MusicOntology/features/rdf/af-Yaafe.rdf')
nothing@1 250
nothing@1 251 ############# libXtract ###############
nothing@1 252 graph = Graph()
nothing@1 253 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 254 graph.bind('local', URIRef(local))
nothing@1 255 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 256
nothing@1 257 path = '/Users/alo/Development/MIR/LibXtract/xtract/libxtract.h'
nothing@1 258
nothing@1 259 lines = [line.strip() for line in open(path)]
nothing@1 260
nothing@1 261 xtract = lines[(lines.index('enum xtract_features_ {')+1):(lines.index('XTRACT_WINDOWED')-1)]
nothing@1 262
nothing@1 263 for ln in xtract:
nothing@1 264 xname = ln[(ln.index('_')+1):-1].replace("_", " ").lower().capitalize()
nothing@1 265
nothing@1 266 graph.add((
nothing@1 267 URIRef(xname),
nothing@1 268 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 269 URIRef('http://www.w3.org/2000/01/rdf-schema#Resource')
nothing@1 270 ))
nothing@1 271
nothing@1 272 graph.serialize('/Users/alo/MusicOntology/features/rdf/af-libXtract.rdf')