annotate pdfextract/writeBase.py @ 18:d5012016bf64 tip

added rdfpy and rdfonto directories
author nothing@tehis.net
date Tue, 23 Apr 2013 11:49:20 +0100
parents 8bd8453e0551
children
rev   line source
nothing@1 1 import rdflib, os, fnmatch, urllib2
nothing@1 2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
nothing@1 3 from xml.dom.minidom import parseString
nothing@1 4
nothing@1 5 names = [line.strip() for line in open('pdfextract/names.txt')]
nothing@1 6 cat = [line.strip() for line in open('pdfextract/categories.txt')]
nothing@1 7 sig = [line.strip() for line in open('pdfextract/sig.txt')]
nothing@1 8
nothing@4 9 graph = Graph()
nothing@1 10
nothing@4 11 af = Namespace('http://sovarr.c4dm.eecs.qmul.ac.uk/features/')
nothing@4 12 graph.bind('af', af)
nothing@4 13
nothing@4 14 dc = Namespace('http://purl.org/dc/elements/1.1/')
nothing@4 15 graph.bind('dc', dc)
nothing@4 16
nothing@4 17 owl = Namespace('http://www.w3.org/2002/07/owl#')
nothing@4 18 graph.bind('owl', owl)
nothing@1 19
nothing@1 20 i = 0
nothing@1 21
nothing@1 22 order = [
nothing@1 23 "Zero Crossing Rate",
nothing@1 24 "Linear Predictive Coding",
nothing@1 25 "Mel-scale Frequency Cepstral Coefficients",
nothing@1 26 "Auditory Filter Bank Temporal Envelopes",
nothing@1 27 "Rate-scale-frequency Features",
nothing@1 28 "Phase Space Features"
nothing@1 29 ]
nothing@1 30
nothing@1 31 domains = {
nothing@1 32 "Zero Crossing Rate": 'temporal',
nothing@1 33 "Linear Predictive Coding": 'frequency',
nothing@1 34 "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
nothing@1 35 "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
nothing@1 36 "Rate-scale-frequency Features": 'eigendomain',
nothing@1 37 "Phase Space Features": 'phase space'
nothing@1 38 }
nothing@1 39
nothing@1 40 abbr = {
nothing@1 41 "Zero Crossing Rate": "ZCR",
nothing@1 42 "Mel-scale Frequency Cepstral Coefficients": "MFCC",
nothing@1 43 "Linear Predictive Coding": "LPC",
nothing@1 44 "Linear Prediction Cepstral Coefficients": "LPCC",
nothing@1 45 "Zero crossing peak amplitudes": "ZCPA",
nothing@1 46 "Line spectral frequencies": "LSF",
nothing@1 47 "Short-time energy": "STE",
nothing@1 48 "Amplitude descriptor": "AD",
nothing@1 49 "Adaptive time frequency transform": "ATFT",
nothing@1 50 "Daubechies Wavelet coefficient histogram": "DWCH",
nothing@1 51 "Spectral Flux": "SF",
nothing@1 52 "Group delay function": "GDF",
nothing@1 53 "Modified group delay function": "MGDF",
nothing@1 54 "Spectral centroid": "SC",
nothing@1 55 "Subband spectral flux": "SSF",
nothing@1 56 "Perceptual linear prediction": "PLP"
nothing@1 57 }
nothing@1 58
nothing@1 59
nothing@1 60 domain = ""
nothing@1 61 domainIndex = 0
nothing@1 62 compdict = {}
nothing@1 63
nothing@1 64 for filename in ['filters', 'trans', 'aggr']:
nothing@1 65 for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
nothing@1 66 compdict[line[0]] = line[2:]
nothing@1 67
nothing@1 68
nothing@1 69
nothing@1 70 for name in names:
nothing@1 71 id = local + (name.replace(' ','').replace('-',''))
nothing@1 72
nothing@1 73 if name == order[domainIndex]:
nothing@1 74 domain = domains[order[domainIndex]]
nothing@1 75 domainIndex += 1
nothing@1 76
nothing@1 77 graph.add(( URIRef(id),
nothing@1 78 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 79 URIRef(OWL.Class)
nothing@1 80 ))
nothing@1 81
nothing@1 82 graph.add((
nothing@1 83 URIRef(id),
nothing@1 84 URIRef(local + 'feature'),
nothing@1 85 Literal(name.replace(' ','').replace('-',''))
nothing@1 86 ))
nothing@1 87
nothing@1 88 graph.add((
nothing@1 89 URIRef(id),
nothing@1 90 URIRef(local + 'domain'),
nothing@1 91 Literal(domain)
nothing@1 92 ))
nothing@1 93
nothing@1 94 word = cat[i].split(' ')
nothing@1 95
nothing@1 96 temp = {
nothing@1 97 'I': 'intraframe',
nothing@1 98 'X': 'interframe',
nothing@1 99 'G': 'global'
nothing@1 100 }[word[0]]
nothing@1 101
nothing@1 102 graph.add((
nothing@1 103 URIRef(id),
nothing@1 104 URIRef(local + 'temporalscale'),
nothing@1 105 Literal(temp)
nothing@1 106 ))
nothing@1 107
nothing@1 108 if word[1] == 'Y':
nothing@1 109 temp = 'perceptual'
nothing@1 110 else:
nothing@1 111 temp = 'physical'
nothing@1 112
nothing@1 113 graph.add((
nothing@1 114 URIRef(id),
nothing@1 115 URIRef(local + 'level'),
nothing@1 116 Literal(temp)
nothing@1 117 ))
nothing@1 118
nothing@1 119 if word[2] == 'Y':
nothing@1 120 graph.add((
nothing@1 121 URIRef(id),
nothing@1 122 URIRef(local + 'model'),
nothing@1 123 Literal('psychoacoustic')
nothing@1 124 ))
nothing@1 125
nothing@1 126 temp = {
nothing@1 127 'L': 'low',
nothing@1 128 'M': 'medium',
nothing@1 129 'H': 'high'
nothing@1 130 }[word[3]]
nothing@1 131
nothing@1 132 graph.add((
nothing@1 133 URIRef(id),
nothing@1 134 URIRef(local + 'complexity'),
nothing@1 135 Literal(temp)
nothing@1 136 ))
nothing@1 137
nothing@1 138 if word[4] == 'V':
nothing@1 139 temp = 'parameterized'
nothing@4 140 dim = 'multi-dimensional'
nothing@1 141 else:
nothing@1 142 temp = word[4]
nothing@4 143 if int(temp) == 1:
nothing@4 144 dim = 'one-dimensional'
nothing@4 145 else:
nothing@4 146 dim = 'multi-dimensional'
nothing@1 147
nothing@1 148 graph.add((
nothing@1 149 URIRef(id),
nothing@1 150 URIRef(local + 'dimensions'),
nothing@1 151 Literal(temp)
nothing@1 152 ))
nothing@4 153
nothing@4 154 graph.add((
nothing@4 155 URIRef(id),
nothing@4 156 URIRef(local + 'dimensionality'),
nothing@4 157 Literal(dim)
nothing@4 158 ))
nothing@1 159
nothing@1 160 temp = {
nothing@1 161 'ASR': "speech recognition",
nothing@1 162 'ESR': "environmental sound recognition",
nothing@1 163 'MIR': "music information retrieval",
nothing@1 164 'AS': "audio segmentation",
nothing@1 165 'FP': "fingerprinting",
nothing@1 166 'VAR': "several",
nothing@1 167 'EXC': ''
nothing@1 168 }[word[5]]
nothing@1 169
nothing@1 170 if temp != '':
nothing@1 171 graph.add((
nothing@1 172 URIRef(id),
nothing@1 173 URIRef(local + 'appdomain'),
nothing@1 174 Literal(temp)
nothing@1 175 ))
nothing@1 176
nothing@1 177 steps = sig[i].split(' ')
nothing@1 178
nothing@1 179 for key in steps:
nothing@1 180 graph.add((
nothing@1 181 URIRef(id),
nothing@1 182 URIRef(local + 'computation'),
nothing@1 183 Literal(compdict[key])
nothing@1 184 ))
nothing@1 185
nothing@1 186 if name.find('MPEG-7') >= 0:
nothing@1 187 graph.add((
nothing@1 188 URIRef(id),
nothing@1 189 URIRef(local + 'computedIn'),
nothing@1 190 Literal('MPEG-7')
nothing@1 191 ))
nothing@1 192
nothing@1 193 if name in abbr.keys():
nothing@1 194 graph.add((
nothing@1 195 URIRef(id),
nothing@1 196 URIRef(local + 'abbreviation'),
nothing@1 197 Literal(abbr[name])
nothing@1 198 ))
nothing@1 199
nothing@1 200
nothing@1 201 i += 1
nothing@1 202
nothing@1 203
nothing@1 204 graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')