comparison rdfpy/writeBaseOnto.py @ 0:62d2c72e4223

initial commit
author nothing@tehis.net
date Mon, 25 Feb 2013 14:40:54 +0000
parents
children 53069717108c
comparison
equal deleted inserted replaced
-1:000000000000 0:62d2c72e4223
1 import rdflib, os, fnmatch, urllib2
2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL, XSD, Namespace
3 from xml.dom.minidom import parseString
4
5 names = [line.strip() for line in open('pdfextract/names.txt')]
6 cat = [line.strip() for line in open('pdfextract/categories.txt')]
7 sig = [line.strip() for line in open('pdfextract/sig.txt')]
8
9 basedir = '/Users/alo/MusicOntology/features/'
10
11 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
12
13 DC = Namespace(u"http://purl.org/dc/elements/1.1/")
14 VS = Namespace(u"http://www.w3.org/2003/06/sw-vocab-status/ns#")
15
16 graph = Graph()
17 graph.bind('af', URIRef(local))
18 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
19 graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
20 graph.bind('xsd', URIRef('http://www.w3.org/2001/XMLSchema#'))
21 graph.bind('vs', URIRef('http://www.w3.org/2003/06/sw-vocab-status/ns#'))
22
23 graph.add((
24 URIRef(''),
25 RDF.type,
26 OWL.Ontology
27 ))
28
29 graph.add((
30 URIRef(''),
31 DC['title'],
32 Literal("Audio Features Base Ontology")
33 ))
34
35 graph.add((
36 URIRef(''),
37 OWL.versionInfo,
38 Literal("Version 0.1")
39 ))
40
41 graph.add((
42 URIRef(''),
43 DC['description'],
44 Literal("This is a base ontology for the Audio Features engineering process collected from literature")
45 ))
46
47 graph.add((
48 VS['term_status'],
49 RDF.type,
50 OWL.AnnotationProperty
51 ))
52
53 i = 0
54
55 order = [
56 "Zero Crossing Rate",
57 "Linear Predictive Coding",
58 "Mel-scale Frequency Cepstral Coefficients",
59 "Auditory Filter Bank Temporal Envelopes",
60 "Rate-scale-frequency Features",
61 "Phase Space Features"
62 ]
63
64 domains = {
65 "Zero Crossing Rate": 'temporal',
66 "Linear Predictive Coding": 'frequency',
67 "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
68 "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
69 "Rate-scale-frequency Features": 'eigendomain',
70 "Phase Space Features": 'phase space'
71 }
72
73 abbr = {
74 "Zero Crossing Rate": "ZCR",
75 "Mel-scale Frequency Cepstral Coefficients": "MFCC",
76 "Linear Predictive Coding": "LPC",
77 "Linear Prediction Cepstral Coefficients": "LPCC",
78 "Zero crossing peak amplitudes": "ZCPA",
79 "Line spectral frequencies": "LSF",
80 "Short-time energy": "STE",
81 "Amplitude descriptor": "AD",
82 "Adaptive time frequency transform": "ATFT",
83 "Daubechies Wavelet coefficient histogram": "DWCH",
84 "Spectral Flux": "SF",
85 "Group delay function": "GDF",
86 "Modified group delay function": "MGDF",
87 "Spectral centroid": "SC",
88 "Subband spectral flux": "SSF",
89 "Perceptual linear prediction": "PLP"
90 }
91
92 appdom = {
93 'ASR': "Speech Recognition",
94 'ESR': "Environmental Sound Recognition",
95 'MIR': "Music Information Retrieval",
96 'AS': "Audio Segmentation",
97 'FP': "Fingerprinting",
98 'VAR': "Several",
99 'EXC': ""
100 }
101
102 domain = ""
103 domainIndex = 0
104 compdict = {}
105
106 graph.add((
107 URIRef(local + 'MathematicalOperation'),
108 RDF.type,
109 OWL.Class
110 ))
111
112 graph.add((
113 URIRef(local + 'Filter'),
114 RDF.type,
115 OWL.Class
116 ))
117 graph.add((
118 URIRef(local + 'Filter'),
119 RDFS.subClassOf,
120 URIRef(local + 'MathematicalOperation')
121 ))
122
123 graph.add((
124 URIRef(local + 'Transformation'),
125 RDF.type,
126 OWL.Class
127 ))
128 graph.add((
129 URIRef(local + 'Transformation'),
130 RDFS.subClassOf,
131 URIRef(local + 'MathematicalOperation')
132 ))
133 graph.add((
134 URIRef(local + 'Aggregation'),
135 RDF.type,
136 OWL.Class
137 ))
138 graph.add((
139 URIRef(local + 'Aggregation'),
140 RDFS.subClassOf,
141 URIRef(local + 'MathematicalOperation')
142 ))
143
144 for filename in ['filters', 'trans', 'aggr']:
145 compsuper = filename.replace('filters', 'Filter').replace('trans', 'Transformation').replace('aggr', 'Aggregation')
146 for line in [line.strip() for line in open(basedir + 'pdfextract/' + filename + '.txt')]:
147 compname = line[2:]
148 compidref = URIRef(local + compname.replace(' ', '').replace('(', '').replace(')', '').replace('-', '').replace(',', ''))
149 graph.add((
150 compidref,
151 RDF.type,
152 OWL.Class
153 ))
154 graph.add((
155 compidref,
156 RDFS.subClassOf,
157 URIRef(local + compsuper)
158 ))
159 graph.add((
160 compidref,
161 RDFS.label,
162 Literal(compname)
163 ))
164 compdict[line[0]] = compidref
165
166 graph.add((
167 URIRef(local + 'Signal'),
168 RDF.type,
169 OWL.Class
170 ))
171
172 graph.add((
173 URIRef(local + 'Feature'),
174 RDF.type,
175 OWL.Class
176 ))
177
178 graph.add((
179 URIRef(local + 'Feature'),
180 OWL.subClassOf,
181 URIRef(local + 'Signal'),
182 ))
183
184 for dom in domains.values():
185 idref = URIRef(local + dom.capitalize().replace(' ', '') + 'Feature')
186 graph.add((
187 idref,
188 RDF.type,
189 OWL.Class
190 ))
191 graph.add((
192 idref,
193 RDFS.subClassOf,
194 URIRef(local + 'Feature')
195 ))
196
197 graph.add((
198 URIRef(local + 'PerceptualFeature'),
199 RDF.type,
200 OWL.Class
201 ))
202 graph.add((
203 URIRef(local + 'PerceptualFeature'),
204 RDFS.subClassOf,
205 URIRef(local + 'Feature')
206 ))
207
208 graph.add((
209 URIRef(local + 'FrequencyDomainPerceptualFeature'),
210 RDF.type,
211 OWL.Class
212 ))
213 graph.add((
214 URIRef(local + 'FrequencyDomainPerceptualFeature'),
215 RDFS.subClassOf,
216 URIRef(local + 'FrequencyFeature')
217 ))
218 graph.add((
219 URIRef(local + 'FrequencyDomainPerceptualFeature'),
220 OWL.equivalentClass,
221 URIRef(local + 'PerceptualFeature')
222 ))
223
224 graph.add((
225 URIRef(local + 'FrequencyDomainPhysicalFeature'),
226 RDF.type,
227 OWL.Class
228 ))
229 graph.add((
230 URIRef(local + 'FrequencyDomainPhysicalFeature'),
231 RDFS.subClassOf,
232 URIRef(local + 'FrequencyFeature')
233 ))
234 graph.add((
235 URIRef(local + 'FrequencyDomainPhysicalFeature'),
236 OWL.equivalentClass,
237 URIRef(local + 'PhysicalFeature')
238 ))
239
240
241
242 graph.add((
243 URIRef(local + 'PhysicalFeature'),
244 RDF.type,
245 OWL.Class
246 ))
247 graph.add((
248 URIRef(local + 'PhysicalFeature'),
249 RDFS.subClassOf,
250 URIRef(local + 'Feature')
251 ))
252
253 graph.add((
254 URIRef(local + 'ParametrizedDimensions'),
255 RDF.type,
256 OWL.Class
257 ))
258
259 graph.add((
260 URIRef(local + 'ComputationalComplexity'),
261 RDF.type,
262 OWL.Class
263 ))
264 graph.add((
265 URIRef(local + 'LowComplexity'),
266 RDF.type,
267 OWL.Class
268 ))
269 graph.add((
270 URIRef(local + 'LowComplexity'),
271 RDFS.subClassOf,
272 URIRef(local + 'ComputationalComplexity')
273 ))
274 graph.add((
275 URIRef(local + 'MediumComplexity'),
276 RDF.type,
277 OWL.Class
278 ))
279 graph.add((
280 URIRef(local + 'MediumComplexity'),
281 RDFS.subClassOf,
282 URIRef(local + 'ComputationalComplexity')
283 ))
284 graph.add((
285 URIRef(local + 'HighComplexity'),
286 RDF.type,
287 OWL.Class
288 ))
289 graph.add((
290 URIRef(local + 'HighComplexity'),
291 RDFS.subClassOf,
292 URIRef(local + 'ComputationalComplexity')
293 ))
294
295 graph.add((
296 URIRef(local + 'TemporalScale'),
297 RDF.type,
298 OWL.Class
299 ))
300 graph.add((
301 URIRef(local + 'IntraFrame'),
302 RDF.type,
303 OWL.Class
304 ))
305 graph.add((
306 URIRef(local + 'IntraFrame'),
307 RDFS.subClassOf,
308 URIRef(local + 'TemporalScale')
309 ))
310 graph.add((
311 URIRef(local + 'InterFrame'),
312 RDF.type,
313 OWL.Class
314 ))
315 graph.add((
316 URIRef(local + 'InterFrame'),
317 RDFS.subClassOf,
318 URIRef(local + 'TemporalScale')
319 ))
320 graph.add((
321 URIRef(local + 'Global'),
322 RDF.type,
323 OWL.Class
324 ))
325 graph.add((
326 URIRef(local + 'Global'),
327 RDFS.subClassOf,
328 URIRef(local + 'TemporalScale')
329 ))
330
331
332 graph.add((
333 URIRef(local + 'ApplicationDomain'),
334 RDF.type,
335 OWL.Class
336 ))
337
338 for key in appdom.keys():
339 if appdom[key] != "":
340 idref = URIRef(local + appdom[key].replace(" ", ""))
341 graph.add((
342 idref,
343 URIRef(RDF.type),
344 OWL.Class
345 ))
346 graph.add((
347 idref,
348 RDFS.subClassOf,
349 URIRef(local + 'ApplicationDomain')
350 ))
351
352 #properties
353 graph.add((
354 URIRef(local + "application_domain"),
355 RDF.type,
356 RDF.Property
357 ))
358 graph.add((
359 URIRef(local + "application_domain"),
360 RDFS.range,
361 URIRef(local + 'ApplicationDomain')
362 ))
363 graph.add((
364 URIRef(local + "application_domain"),
365 VS['term_status'],
366 Literal("testing")
367 ))
368 graph.add((
369 URIRef(local + "application_domain"),
370 RDFS.comment,
371 Literal("application domain property")
372 ))
373
374
375
376 graph.add((
377 URIRef(local + "semantic_interpretation"),
378 RDF.type,
379 RDF.Property
380 ))
381 graph.add((
382 URIRef(local + "semantic_interpretation"),
383 VS['term_status'],
384 Literal("testing")
385 ))
386
387 graph.add((
388 URIRef(local + "computational_complexity"),
389 RDF.type,
390 RDF.Property
391 ))
392 graph.add((
393 URIRef(local + "computational_complexity"),
394 VS['term_status'],
395 Literal("testing")
396 ))
397
398 graph.add((
399 URIRef(local + "computational_complexity"),
400 RDFS.range,
401 URIRef(local + 'ComputationalComplexity')
402 ))
403
404 graph.add((
405 URIRef(local + "psychoacoustic_model"),
406 RDF.type,
407 RDF.Property
408 ))
409 graph.add((
410 URIRef(local + "psychoacoustic_model"),
411 RDFS.range,
412 XSD.Boolean
413 ))
414 graph.add((
415 URIRef(local + "psychoacoustic_model"),
416 VS['term_status'],
417 Literal("testing")
418 ))
419
420
421 graph.add((
422 URIRef(local + "dimensions"),
423 RDF.type,
424 RDF.Property
425 ))
426 graph.add((
427 URIRef(local + "dimensions"),
428 RDFS.range,
429 XSD.Integer
430 ))
431 graph.add((
432 URIRef(local + "dimensions"),
433 RDFS.range,
434 URIRef(local + 'ParametrizedDimensions')
435 ))
436
437 graph.add((
438 URIRef(local + "temporal_scale"),
439 RDF.type,
440 RDF.Property
441 ))
442 graph.add((
443 URIRef(local + "temporal_scale"),
444 RDFS.range,
445 URIRef(local + 'TemporalScale')
446 ))
447
448 for name in names:
449 id = local + (name.replace(' ','').replace('-',''))
450
451 if name == order[domainIndex]:
452 domain = domains[order[domainIndex]]
453 domainIndex += 1
454
455 graph.add(( URIRef(id),
456 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
457 OWL.Class
458 ))
459
460 graph.add((
461 URIRef(id),
462 VS['term_status'],
463 Literal("testing")
464 ))
465
466 if domain == "frequency":
467 if word[1] == 'Y':
468 temp = URIRef(local + 'FrequencyDomainPerceptualFeature')
469 else:
470 temp = URIRef(local + 'FrequencyDomainPhysicalFeature')
471
472 graph.add((
473 URIRef(id),
474 RDFS.subClassOf,
475 URIRef(temp)
476 ))
477
478 else:
479 graph.add((
480 URIRef(id),
481 RDFS.subClassOf,
482 URIRef(local + domain.capitalize().replace(' ', '') + 'Feature')
483 ))
484
485 graph.add((
486 URIRef(id),
487 #URIRef(local + 'feature'),
488 RDFS.label,
489 Literal(name.replace(' ','').replace('-',''))
490 ))
491
492 graph.add((
493 URIRef(id),
494 RDFS.comment,
495 Literal(name + " feature")
496 ))
497
498 graph.add((
499 URIRef(id),
500 RDFS.label,
501 Literal(name)
502 ))
503
504 word = cat[i].split(' ')
505
506 temp = {
507 'I': URIRef(local+'IntraFrame'),
508 'X': URIRef(local+'InterFrame'),
509 'G': URIRef(local+'Global')
510 }[word[0]]
511
512 graph.add((
513 URIRef(id),
514 URIRef(local + 'temporal_scale'),
515 temp
516 ))
517
518
519 if word[1] == 'Y':
520 temp = URIRef(local + 'PerceptualFeature')
521 else:
522 temp = URIRef(local + 'PhysicalFeature')
523
524 graph.add((
525 URIRef(id),
526 URIRef(local + "semantic_interpretation"),
527 temp
528 ))
529
530 if word[2] == 'Y':
531 graph.add((
532 URIRef(id),
533 URIRef(local + "psychoacoustic_model"),
534 Literal(True)
535 ))
536 else:
537 graph.add((
538 URIRef(id),
539 URIRef(local + "psychoacoustic_model"),
540 Literal(False)
541 ))
542
543 temp = {
544 'L': URIRef(local + 'LowComplexity'),
545 'M': URIRef(local + 'MediumComplexity'),
546 'H': URIRef(local + 'HighComplexity')
547 }[word[3]]
548
549 graph.add((
550 URIRef(id),
551 URIRef(local + "computational_complexity"),
552 temp
553 ))
554
555 if word[4] == 'V':
556 temp = URIRef(local + 'ParametrizedDimensions')
557 else:
558 temp = Literal(int(word[4]))
559
560 graph.add((
561 URIRef(id),
562 URIRef(local + 'dimensions'),
563 temp
564 ))
565
566 temp = appdom[word[5]]
567
568 if temp != '':
569 graph.add((
570 URIRef(id),
571 URIRef(local + "application_domain"),
572 URIRef(local + temp.replace(" ", ""))
573 ))
574
575 steps = sig[i].split(' ')
576
577 for key in steps:
578 graph.add((
579 URIRef(id),
580 URIRef(local + 'computation'),
581 compdict[key]
582 ))
583
584 if name.find('MPEG-7') >= 0:
585 graph.add((
586 URIRef(id),
587 URIRef(local + 'computedIn'),
588 Literal('MPEG-7')
589 ))
590 #graph.add((
591 # URIRef(local+name.replace('MPEG-7', '').lower().lstrip().replace(' ', '_')+'_feature'),
592 # RDF.type,
593 # URIRef(id)
594 #))
595
596 if name in abbr.keys():
597 graph.add((
598 URIRef(id),
599 URIRef(local + 'abbreviation'),
600 Literal(abbr[name])
601 ))
602
603
604 i += 1
605
606
607
608 graph.serialize('/Users/alo/MusicOntology/features/baseOnto.n3', format='n3')
609 graph.serialize('/Users/alo/MusicOntology/features/baseOnto.rdf')