Mercurial > hg > audio-features-catalogue
comparison pdfextract/writeBase.py @ 1:365a37a2fb6c
added files from pdfextract directory
author | nothing@tehis.net |
---|---|
date | Mon, 25 Feb 2013 14:47:41 +0000 |
parents | |
children | 8bd8453e0551 |
comparison
equal
deleted
inserted
replaced
0:62d2c72e4223 | 1:365a37a2fb6c |
---|---|
1 import rdflib, os, fnmatch, urllib2 | |
2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL | |
3 from xml.dom.minidom import parseString | |
4 | |
5 names = [line.strip() for line in open('pdfextract/names.txt')] | |
6 cat = [line.strip() for line in open('pdfextract/categories.txt')] | |
7 sig = [line.strip() for line in open('pdfextract/sig.txt')] | |
8 | |
9 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/' | |
10 | |
11 graph = Graph() | |
12 graph.bind('local', URIRef(local)) | |
13 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/')) | |
14 graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#')) | |
15 | |
16 i = 0 | |
17 | |
18 order = [ | |
19 "Zero Crossing Rate", | |
20 "Linear Predictive Coding", | |
21 "Mel-scale Frequency Cepstral Coefficients", | |
22 "Auditory Filter Bank Temporal Envelopes", | |
23 "Rate-scale-frequency Features", | |
24 "Phase Space Features" | |
25 ] | |
26 | |
27 domains = { | |
28 "Zero Crossing Rate": 'temporal', | |
29 "Linear Predictive Coding": 'frequency', | |
30 "Mel-scale Frequency Cepstral Coefficients": 'cepstral', | |
31 "Auditory Filter Bank Temporal Envelopes": 'modulation frequency', | |
32 "Rate-scale-frequency Features": 'eigendomain', | |
33 "Phase Space Features": 'phase space' | |
34 } | |
35 | |
36 abbr = { | |
37 "Zero Crossing Rate": "ZCR", | |
38 "Mel-scale Frequency Cepstral Coefficients": "MFCC", | |
39 "Linear Predictive Coding": "LPC", | |
40 "Linear Prediction Cepstral Coefficients": "LPCC", | |
41 "Zero crossing peak amplitudes": "ZCPA", | |
42 "Line spectral frequencies": "LSF", | |
43 "Short-time energy": "STE", | |
44 "Amplitude descriptor": "AD", | |
45 "Adaptive time frequency transform": "ATFT", | |
46 "Daubechies Wavelet coefficient histogram": "DWCH", | |
47 "Spectral Flux": "SF", | |
48 "Group delay function": "GDF", | |
49 "Modified group delay function": "MGDF", | |
50 "Spectral centroid": "SC", | |
51 "Subband spectral flux": "SSF", | |
52 "Perceptual linear prediction": "PLP" | |
53 } | |
54 | |
55 | |
56 domain = "" | |
57 domainIndex = 0 | |
58 compdict = {} | |
59 | |
60 for filename in ['filters', 'trans', 'aggr']: | |
61 for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]: | |
62 compdict[line[0]] = line[2:] | |
63 | |
64 | |
65 | |
66 for name in names: | |
67 id = local + (name.replace(' ','').replace('-','')) | |
68 | |
69 if name == order[domainIndex]: | |
70 domain = domains[order[domainIndex]] | |
71 domainIndex += 1 | |
72 | |
73 graph.add(( URIRef(id), | |
74 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), | |
75 URIRef(OWL.Class) | |
76 )) | |
77 | |
78 graph.add(( | |
79 URIRef(id), | |
80 URIRef(local + 'feature'), | |
81 Literal(name.replace(' ','').replace('-','')) | |
82 )) | |
83 | |
84 graph.add(( | |
85 URIRef(id), | |
86 URIRef(local + 'domain'), | |
87 Literal(domain) | |
88 )) | |
89 | |
90 word = cat[i].split(' ') | |
91 | |
92 temp = { | |
93 'I': 'intraframe', | |
94 'X': 'interframe', | |
95 'G': 'global' | |
96 }[word[0]] | |
97 | |
98 graph.add(( | |
99 URIRef(id), | |
100 URIRef(local + 'temporalscale'), | |
101 Literal(temp) | |
102 )) | |
103 | |
104 | |
105 if word[1] == 'Y': | |
106 temp = 'perceptual' | |
107 else: | |
108 temp = 'physical' | |
109 | |
110 graph.add(( | |
111 URIRef(id), | |
112 URIRef(local + 'level'), | |
113 Literal(temp) | |
114 )) | |
115 | |
116 if word[2] == 'Y': | |
117 graph.add(( | |
118 URIRef(id), | |
119 URIRef(local + 'model'), | |
120 Literal('psychoacoustic') | |
121 )) | |
122 | |
123 temp = { | |
124 'L': 'low', | |
125 'M': 'medium', | |
126 'H': 'high' | |
127 }[word[3]] | |
128 | |
129 graph.add(( | |
130 URIRef(id), | |
131 URIRef(local + 'complexity'), | |
132 Literal(temp) | |
133 )) | |
134 | |
135 if word[4] == 'V': | |
136 temp = 'parameterized' | |
137 else: | |
138 temp = word[4] | |
139 | |
140 graph.add(( | |
141 URIRef(id), | |
142 URIRef(local + 'dimensions'), | |
143 Literal(temp) | |
144 )) | |
145 | |
146 temp = { | |
147 'ASR': "speech recognition", | |
148 'ESR': "environmental sound recognition", | |
149 'MIR': "music information retrieval", | |
150 'AS': "audio segmentation", | |
151 'FP': "fingerprinting", | |
152 'VAR': "several", | |
153 'EXC': '' | |
154 }[word[5]] | |
155 | |
156 if temp != '': | |
157 graph.add(( | |
158 URIRef(id), | |
159 URIRef(local + 'appdomain'), | |
160 Literal(temp) | |
161 )) | |
162 | |
163 steps = sig[i].split(' ') | |
164 | |
165 for key in steps: | |
166 graph.add(( | |
167 URIRef(id), | |
168 URIRef(local + 'computation'), | |
169 Literal(compdict[key]) | |
170 )) | |
171 | |
172 if name.find('MPEG-7') >= 0: | |
173 graph.add(( | |
174 URIRef(id), | |
175 URIRef(local + 'computedIn'), | |
176 Literal('MPEG-7') | |
177 )) | |
178 | |
179 if name in abbr.keys(): | |
180 graph.add(( | |
181 URIRef(id), | |
182 URIRef(local + 'abbreviation'), | |
183 Literal(abbr[name]) | |
184 )) | |
185 | |
186 | |
187 i += 1 | |
188 | |
189 | |
190 graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf') |