diff pdfextract/writeBase.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children 8bd8453e0551
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pdfextract/writeBase.py	Mon Feb 25 14:47:41 2013 +0000
@@ -0,0 +1,190 @@
+import rdflib, os, fnmatch, urllib2
+from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
+from xml.dom.minidom import parseString
+
+names = [line.strip() for line in open('pdfextract/names.txt')]
+cat = [line.strip() for line in open('pdfextract/categories.txt')]
+sig = [line.strip() for line in open('pdfextract/sig.txt')]
+
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+
+graph = Graph()
+graph.bind('local', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
+
+i = 0
+
+order = [
+    "Zero Crossing Rate", 
+    "Linear Predictive Coding", 
+    "Mel-scale Frequency Cepstral Coefficients", 
+    "Auditory Filter Bank Temporal Envelopes",
+    "Rate-scale-frequency Features",
+    "Phase Space Features"
+]
+
+domains = {
+    "Zero Crossing Rate": 'temporal',
+    "Linear Predictive Coding": 'frequency',
+    "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
+    "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
+    "Rate-scale-frequency Features": 'eigendomain',
+    "Phase Space Features": 'phase space'
+}
+
+abbr = {
+	"Zero Crossing Rate": "ZCR",
+	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
+    "Linear Predictive Coding": "LPC",
+    "Linear Prediction Cepstral Coefficients": "LPCC",
+    "Zero crossing peak amplitudes": "ZCPA",
+    "Line spectral frequencies": "LSF",
+    "Short-time energy": "STE",
+    "Amplitude descriptor":  "AD",
+    "Adaptive time frequency transform": "ATFT",
+    "Daubechies Wavelet coefficient histogram": "DWCH",
+    "Spectral Flux": "SF",
+    "Group delay function": "GDF",
+    "Modified group delay function": "MGDF",
+    "Spectral centroid": "SC",
+    "Subband spectral flux": "SSF",
+    "Perceptual linear prediction": "PLP"
+}
+
+
+domain = ""
+domainIndex = 0
+compdict = {}
+
+for filename in ['filters', 'trans', 'aggr']:
+    for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
+        compdict[line[0]] = line[2:]
+        
+
+            
+for name in names:
+    id = local + (name.replace(' ','').replace('-',''))
+    
+    if name == order[domainIndex]:
+        domain = domains[order[domainIndex]]
+        domainIndex += 1
+    
+    graph.add(( URIRef(id),        
+        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+        URIRef(OWL.Class)  
+    ))
+    
+    graph.add(( 
+        URIRef(id), 
+        URIRef(local + 'feature'), 
+        Literal(name.replace(' ','').replace('-','')) 
+    ))
+
+    graph.add(( 
+        URIRef(id), 
+        URIRef(local + 'domain'), 
+        Literal(domain) 
+    ))
+
+    word = cat[i].split(' ')
+
+    temp = {
+        'I': 'intraframe',
+        'X': 'interframe',
+        'G': 'global'
+    }[word[0]]
+
+    graph.add(( 
+        URIRef(id), 
+        URIRef(local + 'temporalscale'), 
+        Literal(temp) 
+    ))
+    
+    
+    if word[1] == 'Y': 
+        temp = 'perceptual'
+    else:
+        temp = 'physical'
+        
+    graph.add((
+        URIRef(id), 
+        URIRef(local + 'level'), 
+        Literal(temp) 
+    ))
+        
+    if word[2] == 'Y':
+        graph.add((
+            URIRef(id), 
+            URIRef(local + 'model'), 
+            Literal('psychoacoustic') 
+        ))
+        
+    temp = {
+        'L': 'low',
+        'M': 'medium',
+        'H': 'high'
+    }[word[3]]
+    
+    graph.add(( 
+        URIRef(id),
+        URIRef(local + 'complexity'), 
+        Literal(temp) 
+    ))
+    
+    if word[4] == 'V':
+        temp = 'parameterized'
+    else:
+        temp = word[4]
+        
+    graph.add(( 
+        URIRef(id),
+        URIRef(local + 'dimensions'), 
+        Literal(temp) 
+    ))
+    
+    temp = {
+        'ASR': "speech recognition",
+        'ESR': "environmental sound recognition",
+        'MIR': "music information retrieval",
+        'AS': "audio segmentation",
+        'FP': "fingerprinting",
+        'VAR': "several",
+        'EXC': ''
+    }[word[5]]
+
+    if temp != '':   
+        graph.add(( 
+            URIRef(id), 
+            URIRef(local + 'appdomain'), 
+            Literal(temp) 
+        ))
+        
+    steps = sig[i].split(' ')
+    
+    for key in steps:
+        graph.add(( 
+            URIRef(id), 
+            URIRef(local + 'computation'), 
+            Literal(compdict[key]) 
+        ))
+        
+    if name.find('MPEG-7') >= 0:
+        graph.add(( 
+            URIRef(id),
+            URIRef(local + 'computedIn'),
+            Literal('MPEG-7')
+        ))
+    
+    if name in abbr.keys():
+        graph.add(( 
+            URIRef(id),
+            URIRef(local + 'abbreviation'),
+            Literal(abbr[name])
+        ))
+        
+    
+    i += 1
+
+
+graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')
\ No newline at end of file