view pdfextract/writeBase.py @ 4:8bd8453e0551

added fuxi facts and rules
author nothing@tehis.net
date Tue, 02 Apr 2013 15:33:18 +0100
parents 365a37a2fb6c
children
line wrap: on
line source
import rdflib, os, fnmatch, urllib2
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
from xml.dom.minidom import parseString

names = [line.strip() for line in open('pdfextract/names.txt')]
cat = [line.strip() for line in open('pdfextract/categories.txt')]
sig = [line.strip() for line in open('pdfextract/sig.txt')]

graph = Graph()

af = Namespace('http://sovarr.c4dm.eecs.qmul.ac.uk/features/')
graph.bind('af', af)

dc = Namespace('http://purl.org/dc/elements/1.1/')
graph.bind('dc', dc)

owl = Namespace('http://www.w3.org/2002/07/owl#')
graph.bind('owl', owl)

i = 0

order = [
    "Zero Crossing Rate", 
    "Linear Predictive Coding", 
    "Mel-scale Frequency Cepstral Coefficients", 
    "Auditory Filter Bank Temporal Envelopes",
    "Rate-scale-frequency Features",
    "Phase Space Features"
]

domains = {
    "Zero Crossing Rate": 'temporal',
    "Linear Predictive Coding": 'frequency',
    "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
    "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
    "Rate-scale-frequency Features": 'eigendomain',
    "Phase Space Features": 'phase space'
}

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Linear Prediction Cepstral Coefficients": "LPCC",
    "Zero crossing peak amplitudes": "ZCPA",
    "Line spectral frequencies": "LSF",
    "Short-time energy": "STE",
    "Amplitude descriptor":  "AD",
    "Adaptive time frequency transform": "ATFT",
    "Daubechies Wavelet coefficient histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group delay function": "GDF",
    "Modified group delay function": "MGDF",
    "Spectral centroid": "SC",
    "Subband spectral flux": "SSF",
    "Perceptual linear prediction": "PLP"
}


domain = ""
domainIndex = 0
compdict = {}

for filename in ['filters', 'trans', 'aggr']:
    for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
        compdict[line[0]] = line[2:]
        

            
for name in names:
    id = local + (name.replace(' ','').replace('-',''))
    
    if name == order[domainIndex]:
        domain = domains[order[domainIndex]]
        domainIndex += 1
    
    graph.add(( URIRef(id),        
        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), 
        URIRef(OWL.Class)  
    ))
    
    graph.add(( 
        URIRef(id), 
        URIRef(local + 'feature'), 
        Literal(name.replace(' ','').replace('-','')) 
    ))

    graph.add(( 
        URIRef(id), 
        URIRef(local + 'domain'), 
        Literal(domain) 
    ))

    word = cat[i].split(' ')

    temp = {
        'I': 'intraframe',
        'X': 'interframe',
        'G': 'global'
    }[word[0]]

    graph.add(( 
        URIRef(id), 
        URIRef(local + 'temporalscale'), 
        Literal(temp) 
    ))
    
    if word[1] == 'Y': 
        temp = 'perceptual'
    else:
        temp = 'physical'
        
    graph.add((
        URIRef(id), 
        URIRef(local + 'level'), 
        Literal(temp) 
    ))
        
    if word[2] == 'Y':
        graph.add((
            URIRef(id), 
            URIRef(local + 'model'), 
            Literal('psychoacoustic') 
        ))
        
    temp = {
        'L': 'low',
        'M': 'medium',
        'H': 'high'
    }[word[3]]
    
    graph.add(( 
        URIRef(id),
        URIRef(local + 'complexity'), 
        Literal(temp) 
    ))
    
    if word[4] == 'V':
        temp = 'parameterized'
        dim = 'multi-dimensional'
    else:
        temp = word[4]
        if int(temp) == 1:
            dim = 'one-dimensional'
        else:
            dim = 'multi-dimensional'
        
    graph.add(( 
        URIRef(id),
        URIRef(local + 'dimensions'), 
        Literal(temp) 
    ))

    graph.add(( 
        URIRef(id),
        URIRef(local + 'dimensionality'), 
        Literal(dim) 
    ))
    
    temp = {
        'ASR': "speech recognition",
        'ESR': "environmental sound recognition",
        'MIR': "music information retrieval",
        'AS': "audio segmentation",
        'FP': "fingerprinting",
        'VAR': "several",
        'EXC': ''
    }[word[5]]

    if temp != '':   
        graph.add(( 
            URIRef(id), 
            URIRef(local + 'appdomain'), 
            Literal(temp) 
        ))
        
    steps = sig[i].split(' ')
    
    for key in steps:
        graph.add(( 
            URIRef(id), 
            URIRef(local + 'computation'), 
            Literal(compdict[key]) 
        ))
        
    if name.find('MPEG-7') >= 0:
        graph.add(( 
            URIRef(id),
            URIRef(local + 'computedIn'),
            Literal('MPEG-7')
        ))
    
    if name in abbr.keys():
        graph.add(( 
            URIRef(id),
            URIRef(local + 'abbreviation'),
            Literal(abbr[name])
        ))
        
    
    i += 1


graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')