Mercurial > hg > audio-features-catalogue

import rdflib, os, fnmatch, urllib2
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
from xml.dom.minidom import parseString

names = [line.strip() for line in open('pdfextract/names.txt')]
cat = [line.strip() for line in open('pdfextract/categories.txt')]
sig = [line.strip() for line in open('pdfextract/sig.txt')]

graph = Graph()

af = Namespace('http://sovarr.c4dm.eecs.qmul.ac.uk/features/')
graph.bind('af', af)

dc = Namespace('http://purl.org/dc/elements/1.1/')
graph.bind('dc', dc)

owl = Namespace('http://www.w3.org/2002/07/owl#')
graph.bind('owl', owl)

i = 0

order = [
    "Zero Crossing Rate",
    "Linear Predictive Coding",
    "Mel-scale Frequency Cepstral Coefficients",
    "Auditory Filter Bank Temporal Envelopes",
    "Rate-scale-frequency Features",
    "Phase Space Features"
]

domains = {
    "Zero Crossing Rate": 'temporal',
    "Linear Predictive Coding": 'frequency',
    "Mel-scale Frequency Cepstral Coefficients": 'cepstral',
    "Auditory Filter Bank Temporal Envelopes": 'modulation frequency',
    "Rate-scale-frequency Features": 'eigendomain',
    "Phase Space Features": 'phase space'
}

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Linear Prediction Cepstral Coefficients": "LPCC",
    "Zero crossing peak amplitudes": "ZCPA",
    "Line spectral frequencies": "LSF",
    "Short-time energy": "STE",
    "Amplitude descriptor":  "AD",
    "Adaptive time frequency transform": "ATFT",
    "Daubechies Wavelet coefficient histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group delay function": "GDF",
    "Modified group delay function": "MGDF",
    "Spectral centroid": "SC",
    "Subband spectral flux": "SSF",
    "Perceptual linear prediction": "PLP"
}


domain = ""
domainIndex = 0
compdict = {}

for filename in ['filters', 'trans', 'aggr']:
    for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]:
        compdict[line[0]] = line[2:]


for name in names:
    id = local + (name.replace(' ','').replace('-',''))

    if name == order[domainIndex]:
        domain = domains[order[domainIndex]]
        domainIndex += 1

    graph.add(( URIRef(id),
        URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
        URIRef(OWL.Class)
    ))

    graph.add((
        URIRef(id),
        URIRef(local + 'feature'),
        Literal(name.replace(' ','').replace('-',''))
    ))

    graph.add((
        URIRef(id),
        URIRef(local + 'domain'),
        Literal(domain)
    ))

    word = cat[i].split(' ')

    temp = {
        'I': 'intraframe',
        'X': 'interframe',
        'G': 'global'
    }[word[0]]

    graph.add((
        URIRef(id),
        URIRef(local + 'temporalscale'),
        Literal(temp)
    ))

    if word[1] == 'Y':
        temp = 'perceptual'
    else:
        temp = 'physical'

    graph.add((
        URIRef(id),
        URIRef(local + 'level'),
        Literal(temp)
    ))

    if word[2] == 'Y':
        graph.add((
            URIRef(id),
            URIRef(local + 'model'),
            Literal('psychoacoustic')
        ))

    temp = {
        'L': 'low',
        'M': 'medium',
        'H': 'high'
    }[word[3]]

    graph.add((
        URIRef(id),
        URIRef(local + 'complexity'),
        Literal(temp)
    ))

    if word[4] == 'V':
        temp = 'parameterized'
        dim = 'multi-dimensional'
    else:
        temp = word[4]
        if int(temp) == 1:
            dim = 'one-dimensional'
        else:
            dim = 'multi-dimensional'

    graph.add((
        URIRef(id),
        URIRef(local + 'dimensions'),
        Literal(temp)
    ))

    graph.add((
        URIRef(id),
        URIRef(local + 'dimensionality'),
        Literal(dim)
    ))

    temp = {
        'ASR': "speech recognition",
        'ESR': "environmental sound recognition",
        'MIR': "music information retrieval",
        'AS': "audio segmentation",
        'FP': "fingerprinting",
        'VAR': "several",
        'EXC': ''
    }[word[5]]

    if temp != '':
        graph.add((
            URIRef(id),
            URIRef(local + 'appdomain'),
            Literal(temp)
        ))

    steps = sig[i].split(' ')

    for key in steps:
        graph.add((
            URIRef(id),
            URIRef(local + 'computation'),
            Literal(compdict[key])
        ))

    if name.find('MPEG-7') >= 0:
        graph.add((
            URIRef(id),
            URIRef(local + 'computedIn'),
            Literal('MPEG-7')
        ))

    if name in abbr.keys():
        graph.add((
            URIRef(id),
            URIRef(local + 'abbreviation'),
            Literal(abbr[name])
        ))


    i += 1


graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf')
author	nothing@tehis.net
date	Tue, 02 Apr 2013 15:33:18 +0100
parents	365a37a2fb6c
children