diff rdfpy/writeBaseOnto.py @ 0:62d2c72e4223

initial commit
author nothing@tehis.net
date Mon, 25 Feb 2013 14:40:54 +0000
parents
children 53069717108c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rdfpy/writeBaseOnto.py	Mon Feb 25 14:40:54 2013 +0000
@@ -0,0 +1,609 @@
+import rdflib, os, fnmatch, urllib2
+from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL, XSD, Namespace
+from xml.dom.minidom import parseString
+
+names = [line.strip() for line in open('pdfextract/names.txt')]
+cat = [line.strip() for line in open('pdfextract/categories.txt')]
+sig = [line.strip() for line in open('pdfextract/sig.txt')]
+
+basedir = '/Users/alo/MusicOntology/features/'
+
+local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'
+
+DC = Namespace(u"http://purl.org/dc/elements/1.1/")
+VS = Namespace(u"http://www.w3.org/2003/06/sw-vocab-status/ns#")
+
+graph = Graph()
+graph.bind('af', URIRef(local))
+graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
+graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
+graph.bind('xsd', URIRef('http://www.w3.org/2001/XMLSchema#'))
+graph.bind('vs', URIRef('http://www.w3.org/2003/06/sw-vocab-status/ns#'))
+
+graph.add((
+    URIRef(''),
+    RDF.type,
+    OWL.Ontology
+))
+
+graph.add((
+    URIRef(''),
+    DC['title'],
+    Literal("Audio Features Base Ontology")
+))
+
+graph.add((
+    URIRef(''),
+    OWL.versionInfo,
+    Literal("Version 0.1")
+))
+
+graph.add((
+    URIRef(''),
+    DC['description'],
+    Literal("This is a base ontology for the Audio Features engineering process collected from literature")
+))
+
+graph.add((
+    VS['term_status'],
+    RDF.type,
+    OWL.AnnotationProperty
+))
+
+i = 0
+
+order = [
+    "Zero Crossing Rate", 
+    "Linear Predictive Coding", 
+    "Mel-scale Frequency Cepstral Coefficients", 
+    "Auditory Filter Bank Temporal Envelopes",
+    "Rate-scale-frequency Features",
+    "Phase Space Features"
+]
+
+domains = {
+    "Zero Crossing Rate": 'temporal',
+    "Linear Predictive Coding": 'frequency',
+    "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
+    "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
+    "Rate-scale-frequency Features": 'eigendomain',
+    "Phase Space Features": 'phase space'
+}
+
+abbr = {
+	"Zero Crossing Rate": "ZCR",
+	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
+    "Linear Predictive Coding": "LPC",
+    "Linear Prediction Cepstral Coefficients": "LPCC",
+    "Zero crossing peak amplitudes": "ZCPA",
+    "Line spectral frequencies": "LSF",
+    "Short-time energy": "STE",
+    "Amplitude descriptor":  "AD",
+    "Adaptive time frequency transform": "ATFT",
+    "Daubechies Wavelet coefficient histogram": "DWCH",
+    "Spectral Flux": "SF",
+    "Group delay function": "GDF",
+    "Modified group delay function": "MGDF",
+    "Spectral centroid": "SC",
+    "Subband spectral flux": "SSF",
+    "Perceptual linear prediction": "PLP"
+}
+
+appdom = {
+    'ASR': "Speech Recognition",
+    'ESR': "Environmental Sound Recognition",
+    'MIR': "Music Information Retrieval",
+    'AS': "Audio Segmentation",
+    'FP': "Fingerprinting",
+    'VAR': "Several",
+    'EXC': ""
+}
+
+domain = ""
+domainIndex = 0
+compdict = {}
+
+graph.add((
+    URIRef(local + 'MathematicalOperation'),
+    RDF.type,
+    OWL.Class
+))
+
+graph.add((
+    URIRef(local + 'Filter'),
+    RDF.type,
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'Filter'),
+    RDFS.subClassOf,
+    URIRef(local + 'MathematicalOperation')
+))
+
+graph.add((
+    URIRef(local + 'Transformation'),
+    RDF.type,
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'Transformation'),
+    RDFS.subClassOf,
+    URIRef(local + 'MathematicalOperation')
+))
+graph.add((
+    URIRef(local + 'Aggregation'),
+    RDF.type,
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'Aggregation'),
+    RDFS.subClassOf,
+    URIRef(local + 'MathematicalOperation')
+))
+
+for filename in ['filters', 'trans', 'aggr']:
+    compsuper = filename.replace('filters', 'Filter').replace('trans', 'Transformation').replace('aggr', 'Aggregation')
+    for line in [line.strip() for line in open(basedir + 'pdfextract/' + filename + '.txt')]:
+        compname = line[2:]
+        compidref = URIRef(local + compname.replace(' ', '').replace('(', '').replace(')', '').replace('-', '').replace(',', ''))
+        graph.add((
+            compidref,
+            RDF.type,
+            OWL.Class
+        ))
+        graph.add((
+            compidref,
+            RDFS.subClassOf,
+            URIRef(local + compsuper)
+        ))
+        graph.add((
+            compidref,
+            RDFS.label,
+            Literal(compname)
+        ))
+        compdict[line[0]] = compidref
+
+graph.add((
+    URIRef(local + 'Signal'),
+    RDF.type, 
+    OWL.Class 
+))
+
+graph.add((
+    URIRef(local + 'Feature'),
+    RDF.type, 
+    OWL.Class 
+))
+
+graph.add((
+    URIRef(local + 'Feature'),
+    OWL.subClassOf, 
+    URIRef(local + 'Signal'),
+))
+
+for dom in domains.values():
+    idref = URIRef(local + dom.capitalize().replace(' ', '') + 'Feature')
+    graph.add((
+        idref,
+        RDF.type, 
+        OWL.Class  
+    ))
+    graph.add((
+        idref,
+        RDFS.subClassOf, 
+        URIRef(local + 'Feature')
+    ))
+
+graph.add((
+    URIRef(local + 'PerceptualFeature'),
+    RDF.type, 
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'PerceptualFeature'),
+    RDFS.subClassOf, 
+    URIRef(local + 'Feature')
+))
+
+graph.add((
+    URIRef(local + 'FrequencyDomainPerceptualFeature'),
+    RDF.type, 
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'FrequencyDomainPerceptualFeature'),
+    RDFS.subClassOf, 
+    URIRef(local + 'FrequencyFeature')
+))
+graph.add((
+    URIRef(local + 'FrequencyDomainPerceptualFeature'),
+    OWL.equivalentClass, 
+    URIRef(local + 'PerceptualFeature')
+))
+
+graph.add((
+    URIRef(local + 'FrequencyDomainPhysicalFeature'),
+    RDF.type, 
+    OWL.Class
+))
+graph.add((
+    URIRef(local + 'FrequencyDomainPhysicalFeature'),
+    RDFS.subClassOf, 
+    URIRef(local + 'FrequencyFeature')
+))
+graph.add((
+    URIRef(local + 'FrequencyDomainPhysicalFeature'),
+    OWL.equivalentClass, 
+    URIRef(local + 'PhysicalFeature')
+))
+
+
+
+graph.add((
+    URIRef(local + 'PhysicalFeature'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'PhysicalFeature'),
+    RDFS.subClassOf, 
+    URIRef(local + 'Feature')
+))
+
+graph.add((
+    URIRef(local + 'ParametrizedDimensions'),
+    RDF.type, 
+    OWL.Class  
+))
+
+graph.add((
+    URIRef(local + 'ComputationalComplexity'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'LowComplexity'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'LowComplexity'),
+    RDFS.subClassOf, 
+    URIRef(local + 'ComputationalComplexity')
+))
+graph.add((
+    URIRef(local + 'MediumComplexity'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'MediumComplexity'),
+    RDFS.subClassOf, 
+    URIRef(local + 'ComputationalComplexity')
+))
+graph.add((
+    URIRef(local + 'HighComplexity'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'HighComplexity'),
+    RDFS.subClassOf, 
+    URIRef(local + 'ComputationalComplexity')
+))
+
+graph.add((
+    URIRef(local + 'TemporalScale'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'IntraFrame'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'IntraFrame'),
+    RDFS.subClassOf, 
+    URIRef(local + 'TemporalScale')
+))
+graph.add((
+    URIRef(local + 'InterFrame'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'InterFrame'),
+    RDFS.subClassOf, 
+    URIRef(local + 'TemporalScale')
+))
+graph.add((
+    URIRef(local + 'Global'),
+    RDF.type, 
+    OWL.Class  
+))
+graph.add((
+    URIRef(local + 'Global'),
+    RDFS.subClassOf, 
+    URIRef(local + 'TemporalScale')
+))
+
+
+graph.add((
+    URIRef(local + 'ApplicationDomain'),
+    RDF.type, 
+    OWL.Class  
+))
+
+for key in appdom.keys():
+    if appdom[key] != "":
+        idref = URIRef(local + appdom[key].replace(" ", ""))
+        graph.add((
+            idref,
+            URIRef(RDF.type), 
+            OWL.Class  
+        ))
+        graph.add((
+            idref,
+            RDFS.subClassOf, 
+            URIRef(local + 'ApplicationDomain')
+        ))
+
+#properties
+graph.add((
+    URIRef(local + "application_domain"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "application_domain"),
+    RDFS.range,
+    URIRef(local + 'ApplicationDomain')
+))
+graph.add((
+    URIRef(local + "application_domain"),
+    VS['term_status'],
+    Literal("testing")
+))
+graph.add((
+    URIRef(local + "application_domain"),
+    RDFS.comment,
+    Literal("application domain property")
+))
+
+
+
+graph.add((
+    URIRef(local + "semantic_interpretation"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "semantic_interpretation"),
+    VS['term_status'],
+    Literal("testing")
+))
+    
+graph.add((
+    URIRef(local + "computational_complexity"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "computational_complexity"),
+    VS['term_status'],
+    Literal("testing")
+))
+
+graph.add((
+    URIRef(local + "computational_complexity"),
+    RDFS.range,
+    URIRef(local + 'ComputationalComplexity')
+))
+
+graph.add((
+    URIRef(local + "psychoacoustic_model"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "psychoacoustic_model"),
+    RDFS.range,
+    XSD.Boolean
+))
+graph.add((
+    URIRef(local + "psychoacoustic_model"),
+    VS['term_status'],
+    Literal("testing")
+))
+
+
+graph.add((
+    URIRef(local + "dimensions"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "dimensions"),
+    RDFS.range,
+    XSD.Integer
+))
+graph.add((
+    URIRef(local + "dimensions"),
+    RDFS.range,
+    URIRef(local + 'ParametrizedDimensions')
+))
+
+graph.add((
+    URIRef(local + "temporal_scale"),
+    RDF.type,
+    RDF.Property
+))
+graph.add((
+    URIRef(local + "temporal_scale"),
+    RDFS.range,
+    URIRef(local + 'TemporalScale')
+))
+            
+for name in names:
+    id = local + (name.replace(' ','').replace('-',''))
+    
+    if name == order[domainIndex]:
+        domain = domains[order[domainIndex]]
+        domainIndex += 1
+    
+    graph.add(( URIRef(id),        
+        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
+        OWL.Class  
+    ))
+    
+    graph.add((
+        URIRef(id),
+        VS['term_status'],
+        Literal("testing")
+    ))
+
+    if domain == "frequency":
+        if word[1] == 'Y': 
+            temp = URIRef(local + 'FrequencyDomainPerceptualFeature')
+        else:
+            temp = URIRef(local + 'FrequencyDomainPhysicalFeature')
+
+        graph.add(( 
+            URIRef(id), 
+            RDFS.subClassOf, 
+            URIRef(temp)
+        ))
+            
+    else:
+        graph.add(( 
+            URIRef(id), 
+            RDFS.subClassOf, 
+            URIRef(local + domain.capitalize().replace(' ', '') + 'Feature')
+        ))
+    
+    graph.add(( 
+        URIRef(id), 
+        #URIRef(local + 'feature'), 
+        RDFS.label,
+        Literal(name.replace(' ','').replace('-','')) 
+    ))
+
+    graph.add(( 
+        URIRef(id), 
+        RDFS.comment,
+        Literal(name + " feature") 
+    ))
+
+    graph.add(( 
+        URIRef(id), 
+        RDFS.label, 
+        Literal(name) 
+    ))
+
+    word = cat[i].split(' ')
+
+    temp = {
+        'I': URIRef(local+'IntraFrame'),
+        'X': URIRef(local+'InterFrame'),
+        'G': URIRef(local+'Global')
+    }[word[0]]
+
+    graph.add(( 
+        URIRef(id), 
+        URIRef(local + 'temporal_scale'), 
+        temp 
+    ))
+    
+    
+    if word[1] == 'Y': 
+        temp = URIRef(local + 'PerceptualFeature')
+    else:
+        temp = URIRef(local + 'PhysicalFeature')
+        
+    graph.add((
+        URIRef(id), 
+        URIRef(local + "semantic_interpretation"), 
+        temp
+    ))
+        
+    if word[2] == 'Y':
+        graph.add((
+            URIRef(id), 
+            URIRef(local + "psychoacoustic_model"), 
+            Literal(True) 
+        ))
+    else:
+        graph.add((
+            URIRef(id), 
+            URIRef(local + "psychoacoustic_model"), 
+            Literal(False) 
+        ))
+        
+    temp = {
+        'L': URIRef(local + 'LowComplexity'),
+        'M': URIRef(local + 'MediumComplexity'),
+        'H': URIRef(local + 'HighComplexity')
+    }[word[3]]
+    
+    graph.add(( 
+        URIRef(id),
+        URIRef(local + "computational_complexity"), 
+        temp 
+    ))
+    
+    if word[4] == 'V':
+        temp = URIRef(local + 'ParametrizedDimensions')
+    else:
+        temp = Literal(int(word[4]))
+        
+    graph.add(( 
+        URIRef(id),
+        URIRef(local + 'dimensions'), 
+        temp
+    ))
+    
+    temp = appdom[word[5]]
+
+    if temp != '':   
+        graph.add(( 
+            URIRef(id), 
+            URIRef(local + "application_domain"),            
+            URIRef(local + temp.replace(" ", "")) 
+        ))            
+        
+    steps = sig[i].split(' ')
+    
+    for key in steps:
+        graph.add(( 
+            URIRef(id), 
+            URIRef(local + 'computation'), 
+            compdict[key] 
+        ))
+        
+    if name.find('MPEG-7') >= 0:
+        graph.add(( 
+            URIRef(id),
+            URIRef(local + 'computedIn'),
+            Literal('MPEG-7')
+        ))
+        #graph.add(( 
+        #    URIRef(local+name.replace('MPEG-7', '').lower().lstrip().replace(' ', '_')+'_feature'),
+        #    RDF.type,
+        #    URIRef(id)
+        #))
+    
+    if name in abbr.keys():
+        graph.add(( 
+            URIRef(id),
+            URIRef(local + 'abbreviation'),
+            Literal(abbr[name])
+        ))
+        
+    
+    i += 1
+
+
+
+graph.serialize('/Users/alo/MusicOntology/features/baseOnto.n3', format='n3')
+graph.serialize('/Users/alo/MusicOntology/features/baseOnto.rdf')