view rdfpy/writeBaseOnto.py @ 15:53069717108c

fuxi base literal update
author nothing@tehis.net
date Sun, 14 Apr 2013 17:54:23 +0100
parents 62d2c72e4223
children d5012016bf64
line wrap: on
line source
import rdflib, os, fnmatch, urllib2
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL, XSD, Namespace, BNode
from InfixOwl.InfixOwl import *
from xml.dom.minidom import parseString

def addProperty(graph, propID, type, range, domain, label, comment):
    propURI = URIRef(local + propID)
    graph.add((
        propURI,
        RDF.type,
        OWL.ObjectProperty
    ))
    graph.add((
        propURI,
        RDFS.range,
        range
    ))
    graph.add((
        propURI,
        RDFS.domain,
        domain
    ))
    graph.add((
        propURI,
        VS['term_status'],
        Literal("testing")
    ))
    graph.add((
        propURI,
        RDFS.label,
        Literal(label)
    ))
    graph.add((
        propURI,
        RDFS.comment,
        Literal(comment)
    ))
    

names = [line.strip() for line in open('pdfextract/names.txt')]
cat = [line.strip() for line in open('pdfextract/categories.txt')]
sig = [line.strip() for line in open('pdfextract/sigv2.txt')]


i = 0

order = [
    "Zero Crossing Rate", 
    "Linear Predictive Coding", 
    "Mel-scale Frequency Cepstral Coefficients", 
    "Auditory Filter Bank Temporal Envelopes",
    "Rate-scale-frequency Features",
    "Phase Space Features"
]

domains = {
    "Zero Crossing Rate": 'temporal',
    "Linear Predictive Coding": 'frequency',
    "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
    "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
    "Rate-scale-frequency Features": 'eigendomain',
    "Phase Space Features": 'phase space'
}

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Linear Prediction Cepstral Coefficients": "LPCC",
    "Zero crossing peak amplitudes": "ZCPA",
    "Line spectral frequencies": "LSF",
    "Short-time energy": "STE",
    "Amplitude descriptor":  "AD",
    "Adaptive time frequency transform": "ATFT",
    "Daubechies Wavelet coefficient histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group delay function": "GDF",
    "Modified group delay function": "MGDF",
    "Spectral centroid": "SC",
    "Subband spectral flux": "SSF",
    "Perceptual linear prediction": "PLP"
}

appdom = {
    'ASR': "Speech Recognition",
    'ESR': "Environmental Sound Recognition",
    'MIR': "Music Information Retrieval",
    'AS': "Audio Segmentation",
    'FP': "Fingerprinting",
    'VAR': "Several",
    'EXC': ""
}

domain = ""
domainIndex = 0
compdict = {}

basedir = '/Users/alo/MusicOntology/features/'

local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/'

DC = Namespace(u"http://purl.org/dc/elements/1.1/")
VS = Namespace(u"http://www.w3.org/2003/06/sw-vocab-status/ns#")

graph = Graph()
graph.bind('af', URIRef(local))
graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/'))
graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#'))
graph.bind('xsd', URIRef('http://www.w3.org/2001/XMLSchema#'))
graph.bind('vs', URIRef('http://www.w3.org/2003/06/sw-vocab-status/ns#'))



########################
####### header #########
########################

graph.add((
    URIRef(''),
    RDF.type,
    OWL.Ontology
))

graph.add((
    URIRef(''),
    DC['title'],
    Literal("Audio Features Base Ontology")
))

graph.add((
    URIRef(''),
    OWL.versionInfo,
    Literal("Version 0.1")
))

graph.add((
    URIRef(''),
    DC['description'],
    Literal("This is a base ontology for the Audio Features engineering process collected from literature")
))

graph.add((
    VS['term_status'],
    RDF.type,
    OWL.AnnotationProperty
))



#############################
####### computation #########
#############################


graph.add((
    URIRef(local + 'MathematicalOperation'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'MathematicalOperation'),
    OWL.subClassOf,
    OWL.Thing
))

bnode = BNode()
Restriction(URIRef(local + "operation"),graph,cardinality=Literal(1, datatype=XSD.int),identifier=bnode)
graph.add(( 
    URIRef(local + 'MathematicalOperation'), 
    RDFS.subClassOf, 
    bnode
))

bnode = BNode()
Restriction(URIRef(local + "operation"),graph,allValuesFrom=URIRef(local + 'MathematicalOperation'),identifier=bnode)
graph.add(( 
    URIRef(local + 'MathematicalOperation'), 
    RDFS.subClassOf, 
    bnode
))

graph.add((
    URIRef(local + "operation"),
    RDF.type,
    OWL.ObjectProperty
))
graph.add((
    URIRef(local + "operation"),
    RDF.type,
    OWL.FunctionalProperty
))
graph.add((
    URIRef(local + "operation"),
    RDFS.domain,
    URIRef(local + 'MathematicalOperation')
))


graph.add((
    URIRef(local + 'Filter'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'Filter'),
    RDFS.subClassOf,
    URIRef(local + 'MathematicalOperation')
))

graph.add((
    URIRef(local + 'Transformation'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'Transformation'),
    RDFS.subClassOf,
    URIRef(local + 'MathematicalOperation')
))
graph.add((
    URIRef(local + 'Aggregation'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'Aggregation'),
    RDFS.subClassOf,
    URIRef(local + 'MathematicalOperation')
))


graph.add((
    URIRef(local + "Computation"),
    RDF.type,
    OWL.Class
))

graph.add((
    URIRef(local + "operation_sequence"),
    RDF.type,
    OWL.ObjectProperty
))
graph.add((
    URIRef(local + "operation_sequence"),
    RDF.type,
    OWL.FunctionalProperty
))
graph.add((
    URIRef(local + "operation_sequence"),
    RDFS.domain,
    URIRef(local + "Computation")
))
graph.add((
    URIRef(local + "operation_sequence"),
    RDFS.range,
    URIRef(local + 'MathematicalOperation')
))

graph.add((
    URIRef(local + "next_operation"),
    RDF.type,
    OWL.ObjectProperty
))
graph.add((
    URIRef(local + "next_operation"),
    RDF.type,
    OWL.FunctionalProperty
))
graph.add((
    URIRef(local + "next_operation"),
    RDFS.domain,
    URIRef(local + "MathematicalOperation")
))
graph.add((
    URIRef(local + "next_operation"),
    RDFS.range,
    URIRef(local + "MathematicalOperation")
))

graph.add((
    URIRef(local + 'LastOperation'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'LastOperation'),
    RDFS.subClassOf,
    URIRef(local + 'MathematicalOperation')
))

bnode = BNode()
Restriction(URIRef(local + "next_operation"),graph,maxCardinality=Literal(0, datatype=XSD.int),identifier=bnode)
graph.add(( 
    URIRef(local + 'LastOperation'), 
    RDFS.subClassOf, 
    bnode
))

graph.add((
    URIRef(local + 'OptionalOperation'),
    RDF.type,
    OWL.Class
))
graph.add((
    URIRef(local + 'OptionalOperation'),
    RDFS.subClassOf,
    URIRef(local + 'MathematicalOperation')
))

bnode = BNode()
Restriction(URIRef(local + "operation"),graph,minCardinality=Literal(0, datatype=XSD.int),identifier=bnode)
graph.add(( 
    URIRef(local + 'OptionalOperation'), 
    RDFS.subClassOf, 
    bnode
))


for filename in ['filters', 'trans']:
    compsuper = filename.replace('filters', 'Filter').replace('trans', 'Transformation')
    for line in [line.strip() for line in open(basedir + 'pdfextract/' + filename + '.txt')]:
        compname = line[2:]
        compidref = URIRef(local + compname.replace(' ', '').replace('(', '').replace(')', '').replace('-', '').replace(',', ''))
        graph.add((
            compidref,
            RDF.type,
            OWL.Class
        ))
        graph.add((
            compidref,
            RDFS.subClassOf,
            URIRef(local + compsuper)
        ))
        graph.add((
            compidref,
            RDFS.label,
            Literal(compname)
        ))
        compdict[line[0]] = compidref


for line in [line.strip() for line in open(basedir + 'pdfextract/aggrdet.txt')]:
    #pair = line.split('\t')
    compname = line.split('\t')[1]
    compidref = URIRef(local + compname.replace(' ', ''))
    graph.add((
        compidref,
        RDF.type,
        OWL.Class
    ))
    graph.add((
        compidref,
        RDFS.subClassOf,
        URIRef(local + "Aggregation")
    ))
    graph.add((
        compidref,
        RDFS.label,
        Literal(compname)
    ))
    compdict[line.split('\t')[0]] = compidref
    

####################################
####### add feature, signal ########
####################################

graph.add((
    URIRef(local + "AudioFeature"),
    RDF.type,
    OWL.Class
))

graph.add((
    URIRef(local + 'Signal'),
    RDF.type, 
    OWL.Class 
))

graph.add((
    URIRef(local + 'Feature'),
    RDF.type, 
    OWL.Class 
))

graph.add((
    URIRef(local + 'Feature'),
    OWL.subClassOf, 
    URIRef(local + 'Signal'),
))



##################################
####### feature hierarchy ########
##################################


for dom in domains.values():
    idref = URIRef(local + dom.capitalize().replace(' ', '') + 'Feature')
    graph.add((
        idref,
        RDF.type, 
        OWL.Class  
    ))
    graph.add((
        idref,
        RDFS.subClassOf, 
        URIRef(local + 'Feature')
    ))

graph.add((
    URIRef(local + 'PerceptualFeature'),
    RDF.type, 
    OWL.Class
))
graph.add((
    URIRef(local + 'PerceptualFeature'),
    RDFS.subClassOf, 
    URIRef(local + 'Feature')
))

graph.add((
    URIRef(local + 'FrequencyDomainPerceptualFeature'),
    RDF.type, 
    OWL.Class
))
graph.add((
    URIRef(local + 'FrequencyDomainPerceptualFeature'),
    RDFS.subClassOf, 
    URIRef(local + 'FrequencyFeature')
))
graph.add((
    URIRef(local + 'FrequencyDomainPerceptualFeature'),
    OWL.equivalentClass, 
    URIRef(local + 'PerceptualFeature')
))

graph.add((
    URIRef(local + 'FrequencyDomainPhysicalFeature'),
    RDF.type, 
    OWL.Class
))
graph.add((
    URIRef(local + 'FrequencyDomainPhysicalFeature'),
    RDFS.subClassOf, 
    URIRef(local + 'FrequencyFeature')
))
graph.add((
    URIRef(local + 'FrequencyDomainPhysicalFeature'),
    OWL.equivalentClass, 
    URIRef(local + 'PhysicalFeature')
))



graph.add((
    URIRef(local + 'PhysicalFeature'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'PhysicalFeature'),
    RDFS.subClassOf, 
    URIRef(local + 'Feature')
))

graph.add((
    URIRef(local + 'ParametrizedDimensions'),
    RDF.type, 
    OWL.Class  
))

graph.add((
    URIRef(local + 'SemanticInterpretation'),
    RDF.type, 
    OWL.Class  
))

### Computational Complexity
graph.add((
    URIRef(local + 'ComputationalComplexity'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'LowComplexity'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'LowComplexity'),
    RDFS.subClassOf, 
    URIRef(local + 'ComputationalComplexity')
))
graph.add((
    URIRef(local + 'MediumComplexity'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'MediumComplexity'),
    RDFS.subClassOf, 
    URIRef(local + 'ComputationalComplexity')
))
graph.add((
    URIRef(local + 'HighComplexity'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'HighComplexity'),
    RDFS.subClassOf, 
    URIRef(local + 'ComputationalComplexity')
))


#### Temporal scale
graph.add((
    URIRef(local + 'TemporalScale'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'IntraFrame'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'IntraFrame'),
    RDFS.subClassOf, 
    URIRef(local + 'TemporalScale')
))
graph.add((
    URIRef(local + 'InterFrame'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'InterFrame'),
    RDFS.subClassOf, 
    URIRef(local + 'TemporalScale')
))
graph.add((
    URIRef(local + 'Global'),
    RDF.type, 
    OWL.Class  
))
graph.add((
    URIRef(local + 'Global'),
    RDFS.subClassOf, 
    URIRef(local + 'TemporalScale')
))


### Application domain
graph.add((
    URIRef(local + 'ApplicationDomain'),
    RDF.type, 
    OWL.Class  
))

for key in appdom.keys():
    if appdom[key] != "":
        idref = URIRef(local + appdom[key].replace(" ", ""))
        graph.add((
            idref,
            URIRef(RDF.type), 
            OWL.Class  
        ))
        graph.add((
            idref,
            RDFS.subClassOf, 
            URIRef(local + 'ApplicationDomain')
        ))



###############################
####### add properties ########
###############################

# property: application_domain
addProperty(
    graph=graph, 
    propID="application_domain", 
    type=OWL.ObjectProperty, 
    range=URIRef(local + 'ApplicationDomain'), 
    domain=URIRef(local + "AudioFeature"), 
    label="application_domain property", 
    comment="application domain: mir, speech, environmental, etc."
)

# property: semantic_interpretation
addProperty(
    graph=graph, 
    propID="semantic_interpretation", 
    type=OWL.ObjectProperty, 
    range=URIRef(local + 'SemanticInterpretation'), 
    domain=URIRef(local + "AudioFeature"), 
    label="semantic_interpretation property", 
    comment="semantic interpretation: physical/perceptual"
)

# property: computational_complexity
addProperty(
    graph=graph, 
    propID="computational_complexity", 
    type=OWL.ObjectProperty, 
    range=URIRef(local + 'ComputationalComplexity'), 
    domain=URIRef(local + "AudioFeature"), 
    label="computational_complexity property", 
    comment="computational complexity: high/medium/low"
)

# property: psychoacoustic_model
addProperty(
    graph=graph, 
    propID="psychoacoustic_model", 
    type=OWL.ObjectProperty, 
    range=XSD.Boolean, 
    domain=URIRef(local + "AudioFeature"), 
    label="psychoacoustic_model property", 
    comment="psychoacoustic model: true/false"
)

# property: temporal_scale
addProperty(
    graph=graph, 
    propID="temporal_scale", 
    type=OWL.ObjectProperty, 
    range=URIRef(local + 'TemporalScale'), 
    domain=URIRef(local + "AudioFeature"), 
    label="temporal_scale", 
    comment="temporal scale: global, interframe, intraframe"
)

# property: dimensions
addProperty(
    graph=graph, 
    propID="dimensions", 
    type=OWL.ObjectProperty, 
    range=XSD.Integer, 
    domain=URIRef(local + "AudioFeature"), 
    label="dimensions property", 
    comment="dimensions: an integer value"
)

# property: computation
addProperty(
    graph=graph, 
    propID="computation", 
    type=OWL.ObjectProperty, 
    range=URIRef(local + 'Computation'), 
    domain=URIRef(local + "AudioFeature"), 
    label="computation", 
    comment="computation: a sequence of mathematical operations"
)


#############################
####### add features ########
#############################

for name in names:
    basename = name.replace(' ','').replace('-','').replace('4Hz', '')
    id = local + basename
    
    if name == order[domainIndex]:
        domain = domains[order[domainIndex]]
        domainIndex += 1

    graph.add((
        URIRef(id),
        RDF.type,
        URIRef(local + "AudioFeature")
    ))
    
#    graph.add(( 
#        URIRef(id),        
#        RDF.type, 
#        OWL.Class  
#    ))
    
    graph.add((
        URIRef(id),
        VS['term_status'],
        Literal("testing")
    ))
        
    if domain == "frequency":
        if word[1] == 'Y': 
            temp = URIRef(local + 'FrequencyDomainPerceptualFeature')
        else:
            temp = URIRef(local + 'FrequencyDomainPhysicalFeature')

        graph.add(( 
            URIRef(id), 
            RDFS.subClassOf, 
            URIRef(temp)
        ))
            
    else:
        graph.add(( 
            URIRef(id), 
            RDFS.subClassOf, 
            URIRef(local + domain.capitalize().replace(' ', '') + 'Feature')
        ))
    
    graph.add(( 
        URIRef(id), 
        #URIRef(local + 'feature'), 
        RDFS.label,
        Literal(name.replace(' ','').replace('-','')) 
    ))

    graph.add(( 
        URIRef(id), 
        RDFS.comment,
        Literal(name + " feature") 
    ))

    graph.add(( 
        URIRef(id), 
        RDFS.label, 
        Literal(name) 
    ))

    word = cat[i].split(' ')

    temp = {
        'I': URIRef(local+'IntraFrame'),
        'X': URIRef(local+'InterFrame'),
        'G': URIRef(local+'Global')
    }[word[0]]

    graph.add(( 
        URIRef(id), 
        URIRef(local + 'temporal_scale'), 
        temp 
    ))
    
    
    if word[1] == 'Y': 
        temp = URIRef(local + 'PerceptualFeature')
    else:
        temp = URIRef(local + 'PhysicalFeature')
        
    graph.add((
        URIRef(id), 
        URIRef(local + "semantic_interpretation"), 
        temp
    ))
        
    if word[2] == 'Y':
        graph.add((
            URIRef(id), 
            URIRef(local + "psychoacoustic_model"), 
            Literal(True) 
        ))
    else:
        graph.add((
            URIRef(id), 
            URIRef(local + "psychoacoustic_model"), 
            Literal(False) 
        ))
        
    temp = {
        'L': URIRef(local + 'LowComplexity'),
        'M': URIRef(local + 'MediumComplexity'),
        'H': URIRef(local + 'HighComplexity')
    }[word[3]]
    
    graph.add(( 
        URIRef(id),
        URIRef(local + "computational_complexity"), 
        temp 
    ))
    
    if word[4] == 'V':
        temp = URIRef(local + 'ParametrizedDimensions')
    else:
        temp = Literal(int(word[4]))
        
    graph.add(( 
        URIRef(id),
        URIRef(local + 'dimensions'), 
        temp
    ))
    
    temp = appdom[word[5]]

    if temp != '':   
        graph.add(( 
            URIRef(id), 
            URIRef(local + "application_domain"),            
            URIRef(local + temp.replace(" ", "")) 
        ))            
        
    steps = sig[i].split(' ')

    compid = URIRef(local + basename + "_computation")
        
    graph.add((compid, RDF.type, URIRef(local+"Computation")))
    
    nSteps = 0
    
    for item in steps:
        if item != "(" and item != ")" and item != "[" and item != "]":
            nSteps = nSteps + 1

    iIndex = -1  
    isOptional = False
    for cIndex in range(len(steps)):
        if steps[cIndex] == "(" or steps[cIndex] == ")" or steps[cIndex] == "[" or steps[cIndex] == "]":
            if steps[cIndex] == "(":
                isOptional = True
        else:
            iIndex = iIndex + 1
            opid = URIRef(local + basename + '_operation_sequence_' + str(iIndex + 1))
            isa = compdict[steps[cIndex]]
            graph.add(( 
                opid, 
                RDF.type, 
                isa
            ))
            if iIndex == 0:
                graph.add((
                    compid,
                    URIRef(local + 'operation_sequence'),
                    opid
                ))
            if iIndex < nSteps-1:
                graph.add(( 
                    opid, 
                    URIRef(local + "next_operation"), 
                    URIRef(local + basename + '_operation_sequence_' + str(iIndex + 2))
                ))            
            else:
                graph.add(( 
                    opid, 
                    RDF.type, 
                    URIRef(local + 'LastOperation')
                )) 
            if isOptional:
                graph.add((
                    opid,
                    RDF.type,
                    URIRef(local + 'OptionalOperation')
                ))
                isOptional = False
                                        
    graph.add(( URIRef(id), URIRef(local+'computation'), compid ))
                        
    if name.find('MPEG-7') >= 0:
        graph.add(( 
            URIRef(id),
            URIRef(local + 'computedIn'),
            Literal('MPEG-7')
        ))
        #graph.add(( 
        #    URIRef(local+name.replace('MPEG-7', '').lower().lstrip().replace(' ', '_')+'_feature'),
        #    RDF.type,
        #    URIRef(id)
        #))
    
    if name in abbr.keys():
        graph.add(( 
            URIRef(id),
            URIRef(local + 'abbreviation'),
            Literal(abbr[name])
        ))
        
    
    i += 1



graph.serialize('/Users/alo/MusicOntology/features/rdfonto/baseOntoV2.n3', format='n3')
graph.serialize('/Users/alo/MusicOntology/features/rdfonto/baseOntoV2.rdf')