comparison pdfextract/writeBase.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children 8bd8453e0551
comparison
equal deleted inserted replaced
0:62d2c72e4223 1:365a37a2fb6c
1 import rdflib, os, fnmatch, urllib2
2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
3 from xml.dom.minidom import parseString
4
5 names = [line.strip() for line in open('pdfextract/names.txt')]
6 cat = [line.strip() for line in open('pdfextract/categories.txt')]
7 sig = [line.strip() for line in open('pdfextract/sig.txt')]
8
9 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
10
11 graph = Graph()
12 graph.bind('local', URIRef(local))
13 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
14 graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
15
16 i = 0
17
18 order = [
19 "Zero Crossing Rate",
20 "Linear Predictive Coding",
21 "Mel-scale Frequency Cepstral Coefficients",
22 "Auditory Filter Bank Temporal Envelopes",
23 "Rate-scale-frequency Features",
24 "Phase Space Features"
25 ]
26
27 domains = {
28 "Zero Crossing Rate": 'temporal',
29 "Linear Predictive Coding": 'frequency',
30 "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
31 "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
32 "Rate-scale-frequency Features": 'eigendomain',
33 "Phase Space Features": 'phase space'
34 }
35
36 abbr = {
37 "Zero Crossing Rate": "ZCR",
38 "Mel-scale Frequency Cepstral Coefficients": "MFCC",
39 "Linear Predictive Coding": "LPC",
40 "Linear Prediction Cepstral Coefficients": "LPCC",
41 "Zero crossing peak amplitudes": "ZCPA",
42 "Line spectral frequencies": "LSF",
43 "Short-time energy": "STE",
44 "Amplitude descriptor": "AD",
45 "Adaptive time frequency transform": "ATFT",
46 "Daubechies Wavelet coefficient histogram": "DWCH",
47 "Spectral Flux": "SF",
48 "Group delay function": "GDF",
49 "Modified group delay function": "MGDF",
50 "Spectral centroid": "SC",
51 "Subband spectral flux": "SSF",
52 "Perceptual linear prediction": "PLP"
53 }
54
55
56 domain = ""
57 domainIndex = 0
58 compdict = {}
59
60 for filename in ['filters', 'trans', 'aggr']:
61 for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
62 compdict[line[0]] = line[2:]
63
64
65
66 for name in names:
67 id = local + (name.replace(' ','').replace('-',''))
68
69 if name == order[domainIndex]:
70 domain = domains[order[domainIndex]]
71 domainIndex += 1
72
73 graph.add(( URIRef(id),
74 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
75 URIRef(OWL.Class)
76 ))
77
78 graph.add((
79 URIRef(id),
80 URIRef(local + 'feature'),
81 Literal(name.replace(' ','').replace('-',''))
82 ))
83
84 graph.add((
85 URIRef(id),
86 URIRef(local + 'domain'),
87 Literal(domain)
88 ))
89
90 word = cat[i].split(' ')
91
92 temp = {
93 'I': 'intraframe',
94 'X': 'interframe',
95 'G': 'global'
96 }[word[0]]
97
98 graph.add((
99 URIRef(id),
100 URIRef(local + 'temporalscale'),
101 Literal(temp)
102 ))
103
104
105 if word[1] == 'Y':
106 temp = 'perceptual'
107 else:
108 temp = 'physical'
109
110 graph.add((
111 URIRef(id),
112 URIRef(local + 'level'),
113 Literal(temp)
114 ))
115
116 if word[2] == 'Y':
117 graph.add((
118 URIRef(id),
119 URIRef(local + 'model'),
120 Literal('psychoacoustic')
121 ))
122
123 temp = {
124 'L': 'low',
125 'M': 'medium',
126 'H': 'high'
127 }[word[3]]
128
129 graph.add((
130 URIRef(id),
131 URIRef(local + 'complexity'),
132 Literal(temp)
133 ))
134
135 if word[4] == 'V':
136 temp = 'parameterized'
137 else:
138 temp = word[4]
139
140 graph.add((
141 URIRef(id),
142 URIRef(local + 'dimensions'),
143 Literal(temp)
144 ))
145
146 temp = {
147 'ASR': "speech recognition",
148 'ESR': "environmental sound recognition",
149 'MIR': "music information retrieval",
150 'AS': "audio segmentation",
151 'FP': "fingerprinting",
152 'VAR': "several",
153 'EXC': ''
154 }[word[5]]
155
156 if temp != '':
157 graph.add((
158 URIRef(id),
159 URIRef(local + 'appdomain'),
160 Literal(temp)
161 ))
162
163 steps = sig[i].split(' ')
164
165 for key in steps:
166 graph.add((
167 URIRef(id),
168 URIRef(local + 'computation'),
169 Literal(compdict[key])
170 ))
171
172 if name.find('MPEG-7') >= 0:
173 graph.add((
174 URIRef(id),
175 URIRef(local + 'computedIn'),
176 Literal('MPEG-7')
177 ))
178
179 if name in abbr.keys():
180 graph.add((
181 URIRef(id),
182 URIRef(local + 'abbreviation'),
183 Literal(abbr[name])
184 ))
185
186
187 i += 1
188
189
190 graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')