annotate pdfextract/writeBase.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children 8bd8453e0551
rev   line source
nothing@1 1 import rdflib, os, fnmatch, urllib2
nothing@1 2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
nothing@1 3 from xml.dom.minidom import parseString
nothing@1 4
nothing@1 5 names = [line.strip() for line in open('pdfextract/names.txt')]
nothing@1 6 cat = [line.strip() for line in open('pdfextract/categories.txt')]
nothing@1 7 sig = [line.strip() for line in open('pdfextract/sig.txt')]
nothing@1 8
nothing@1 9 local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
nothing@1 10
nothing@1 11 graph = Graph()
nothing@1 12 graph.bind('local', URIRef(local))
nothing@1 13 graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
nothing@1 14 graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
nothing@1 15
nothing@1 16 i = 0
nothing@1 17
nothing@1 18 order = [
nothing@1 19 "Zero Crossing Rate",
nothing@1 20 "Linear Predictive Coding",
nothing@1 21 "Mel-scale Frequency Cepstral Coefficients",
nothing@1 22 "Auditory Filter Bank Temporal Envelopes",
nothing@1 23 "Rate-scale-frequency Features",
nothing@1 24 "Phase Space Features"
nothing@1 25 ]
nothing@1 26
nothing@1 27 domains = {
nothing@1 28 "Zero Crossing Rate": 'temporal',
nothing@1 29 "Linear Predictive Coding": 'frequency',
nothing@1 30 "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
nothing@1 31 "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
nothing@1 32 "Rate-scale-frequency Features": 'eigendomain',
nothing@1 33 "Phase Space Features": 'phase space'
nothing@1 34 }
nothing@1 35
nothing@1 36 abbr = {
nothing@1 37 "Zero Crossing Rate": "ZCR",
nothing@1 38 "Mel-scale Frequency Cepstral Coefficients": "MFCC",
nothing@1 39 "Linear Predictive Coding": "LPC",
nothing@1 40 "Linear Prediction Cepstral Coefficients": "LPCC",
nothing@1 41 "Zero crossing peak amplitudes": "ZCPA",
nothing@1 42 "Line spectral frequencies": "LSF",
nothing@1 43 "Short-time energy": "STE",
nothing@1 44 "Amplitude descriptor": "AD",
nothing@1 45 "Adaptive time frequency transform": "ATFT",
nothing@1 46 "Daubechies Wavelet coefficient histogram": "DWCH",
nothing@1 47 "Spectral Flux": "SF",
nothing@1 48 "Group delay function": "GDF",
nothing@1 49 "Modified group delay function": "MGDF",
nothing@1 50 "Spectral centroid": "SC",
nothing@1 51 "Subband spectral flux": "SSF",
nothing@1 52 "Perceptual linear prediction": "PLP"
nothing@1 53 }
nothing@1 54
nothing@1 55
nothing@1 56 domain = ""
nothing@1 57 domainIndex = 0
nothing@1 58 compdict = {}
nothing@1 59
nothing@1 60 for filename in ['filters', 'trans', 'aggr']:
nothing@1 61 for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
nothing@1 62 compdict[line[0]] = line[2:]
nothing@1 63
nothing@1 64
nothing@1 65
nothing@1 66 for name in names:
nothing@1 67 id = local + (name.replace(' ','').replace('-',''))
nothing@1 68
nothing@1 69 if name == order[domainIndex]:
nothing@1 70 domain = domains[order[domainIndex]]
nothing@1 71 domainIndex += 1
nothing@1 72
nothing@1 73 graph.add(( URIRef(id),
nothing@1 74 URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
nothing@1 75 URIRef(OWL.Class)
nothing@1 76 ))
nothing@1 77
nothing@1 78 graph.add((
nothing@1 79 URIRef(id),
nothing@1 80 URIRef(local + 'feature'),
nothing@1 81 Literal(name.replace(' ','').replace('-',''))
nothing@1 82 ))
nothing@1 83
nothing@1 84 graph.add((
nothing@1 85 URIRef(id),
nothing@1 86 URIRef(local + 'domain'),
nothing@1 87 Literal(domain)
nothing@1 88 ))
nothing@1 89
nothing@1 90 word = cat[i].split(' ')
nothing@1 91
nothing@1 92 temp = {
nothing@1 93 'I': 'intraframe',
nothing@1 94 'X': 'interframe',
nothing@1 95 'G': 'global'
nothing@1 96 }[word[0]]
nothing@1 97
nothing@1 98 graph.add((
nothing@1 99 URIRef(id),
nothing@1 100 URIRef(local + 'temporalscale'),
nothing@1 101 Literal(temp)
nothing@1 102 ))
nothing@1 103
nothing@1 104
nothing@1 105 if word[1] == 'Y':
nothing@1 106 temp = 'perceptual'
nothing@1 107 else:
nothing@1 108 temp = 'physical'
nothing@1 109
nothing@1 110 graph.add((
nothing@1 111 URIRef(id),
nothing@1 112 URIRef(local + 'level'),
nothing@1 113 Literal(temp)
nothing@1 114 ))
nothing@1 115
nothing@1 116 if word[2] == 'Y':
nothing@1 117 graph.add((
nothing@1 118 URIRef(id),
nothing@1 119 URIRef(local + 'model'),
nothing@1 120 Literal('psychoacoustic')
nothing@1 121 ))
nothing@1 122
nothing@1 123 temp = {
nothing@1 124 'L': 'low',
nothing@1 125 'M': 'medium',
nothing@1 126 'H': 'high'
nothing@1 127 }[word[3]]
nothing@1 128
nothing@1 129 graph.add((
nothing@1 130 URIRef(id),
nothing@1 131 URIRef(local + 'complexity'),
nothing@1 132 Literal(temp)
nothing@1 133 ))
nothing@1 134
nothing@1 135 if word[4] == 'V':
nothing@1 136 temp = 'parameterized'
nothing@1 137 else:
nothing@1 138 temp = word[4]
nothing@1 139
nothing@1 140 graph.add((
nothing@1 141 URIRef(id),
nothing@1 142 URIRef(local + 'dimensions'),
nothing@1 143 Literal(temp)
nothing@1 144 ))
nothing@1 145
nothing@1 146 temp = {
nothing@1 147 'ASR': "speech recognition",
nothing@1 148 'ESR': "environmental sound recognition",
nothing@1 149 'MIR': "music information retrieval",
nothing@1 150 'AS': "audio segmentation",
nothing@1 151 'FP': "fingerprinting",
nothing@1 152 'VAR': "several",
nothing@1 153 'EXC': ''
nothing@1 154 }[word[5]]
nothing@1 155
nothing@1 156 if temp != '':
nothing@1 157 graph.add((
nothing@1 158 URIRef(id),
nothing@1 159 URIRef(local + 'appdomain'),
nothing@1 160 Literal(temp)
nothing@1 161 ))
nothing@1 162
nothing@1 163 steps = sig[i].split(' ')
nothing@1 164
nothing@1 165 for key in steps:
nothing@1 166 graph.add((
nothing@1 167 URIRef(id),
nothing@1 168 URIRef(local + 'computation'),
nothing@1 169 Literal(compdict[key])
nothing@1 170 ))
nothing@1 171
nothing@1 172 if name.find('MPEG-7') >= 0:
nothing@1 173 graph.add((
nothing@1 174 URIRef(id),
nothing@1 175 URIRef(local + 'computedIn'),
nothing@1 176 Literal('MPEG-7')
nothing@1 177 ))
nothing@1 178
nothing@1 179 if name in abbr.keys():
nothing@1 180 graph.add((
nothing@1 181 URIRef(id),
nothing@1 182 URIRef(local + 'abbreviation'),
nothing@1 183 Literal(abbr[name])
nothing@1 184 ))
nothing@1 185
nothing@1 186
nothing@1 187 i += 1
nothing@1 188
nothing@1 189
nothing@1 190 graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')