Mercurial > hg > audio-features-catalogue
diff pdfextract/writeBase.py @ 1:365a37a2fb6c
added files from pdfextract directory
author | nothing@tehis.net |
---|---|
date | Mon, 25 Feb 2013 14:47:41 +0000 |
parents | |
children | 8bd8453e0551 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pdfextract/writeBase.py Mon Feb 25 14:47:41 2013 +0000 @@ -0,0 +1,190 @@ +import rdflib, os, fnmatch, urllib2 +from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL +from xml.dom.minidom import parseString + +names = [line.strip() for line in open('pdfextract/names.txt')] +cat = [line.strip() for line in open('pdfextract/categories.txt')] +sig = [line.strip() for line in open('pdfextract/sig.txt')] + +local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/' + +graph = Graph() +graph.bind('local', URIRef(local)) +graph.bind('dc', URIRef('http://purl.org/dc/elements/1.1/')) +graph.bind('owl', URIRef('http://www.w3.org/2002/07/owl#')) + +i = 0 + +order = [ + "Zero Crossing Rate", + "Linear Predictive Coding", + "Mel-scale Frequency Cepstral Coefficients", + "Auditory Filter Bank Temporal Envelopes", + "Rate-scale-frequency Features", + "Phase Space Features" +] + +domains = { + "Zero Crossing Rate": 'temporal', + "Linear Predictive Coding": 'frequency', + "Mel-scale Frequency Cepstral Coefficients": 'cepstral', + "Auditory Filter Bank Temporal Envelopes": 'modulation frequency', + "Rate-scale-frequency Features": 'eigendomain', + "Phase Space Features": 'phase space' +} + +abbr = { + "Zero Crossing Rate": "ZCR", + "Mel-scale Frequency Cepstral Coefficients": "MFCC", + "Linear Predictive Coding": "LPC", + "Linear Prediction Cepstral Coefficients": "LPCC", + "Zero crossing peak amplitudes": "ZCPA", + "Line spectral frequencies": "LSF", + "Short-time energy": "STE", + "Amplitude descriptor": "AD", + "Adaptive time frequency transform": "ATFT", + "Daubechies Wavelet coefficient histogram": "DWCH", + "Spectral Flux": "SF", + "Group delay function": "GDF", + "Modified group delay function": "MGDF", + "Spectral centroid": "SC", + "Subband spectral flux": "SSF", + "Perceptual linear prediction": "PLP" +} + + +domain = "" +domainIndex = 0 +compdict = {} + +for filename in ['filters', 'trans', 'aggr']: + for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]: + compdict[line[0]] = line[2:] + + + +for name in names: + id = local + (name.replace(' ','').replace('-','')) + + if name == order[domainIndex]: + domain = domains[order[domainIndex]] + domainIndex += 1 + + graph.add(( URIRef(id), + URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), + URIRef(OWL.Class) + )) + + graph.add(( + URIRef(id), + URIRef(local + 'feature'), + Literal(name.replace(' ','').replace('-','')) + )) + + graph.add(( + URIRef(id), + URIRef(local + 'domain'), + Literal(domain) + )) + + word = cat[i].split(' ') + + temp = { + 'I': 'intraframe', + 'X': 'interframe', + 'G': 'global' + }[word[0]] + + graph.add(( + URIRef(id), + URIRef(local + 'temporalscale'), + Literal(temp) + )) + + + if word[1] == 'Y': + temp = 'perceptual' + else: + temp = 'physical' + + graph.add(( + URIRef(id), + URIRef(local + 'level'), + Literal(temp) + )) + + if word[2] == 'Y': + graph.add(( + URIRef(id), + URIRef(local + 'model'), + Literal('psychoacoustic') + )) + + temp = { + 'L': 'low', + 'M': 'medium', + 'H': 'high' + }[word[3]] + + graph.add(( + URIRef(id), + URIRef(local + 'complexity'), + Literal(temp) + )) + + if word[4] == 'V': + temp = 'parameterized' + else: + temp = word[4] + + graph.add(( + URIRef(id), + URIRef(local + 'dimensions'), + Literal(temp) + )) + + temp = { + 'ASR': "speech recognition", + 'ESR': "environmental sound recognition", + 'MIR': "music information retrieval", + 'AS': "audio segmentation", + 'FP': "fingerprinting", + 'VAR': "several", + 'EXC': '' + }[word[5]] + + if temp != '': + graph.add(( + URIRef(id), + URIRef(local + 'appdomain'), + Literal(temp) + )) + + steps = sig[i].split(' ') + + for key in steps: + graph.add(( + URIRef(id), + URIRef(local + 'computation'), + Literal(compdict[key]) + )) + + if name.find('MPEG-7') >= 0: + graph.add(( + URIRef(id), + URIRef(local + 'computedIn'), + Literal('MPEG-7') + )) + + if name in abbr.keys(): + graph.add(( + URIRef(id), + URIRef(local + 'abbreviation'), + Literal(abbr[name]) + )) + + + i += 1 + + +graph.serialize('/Users/alo/MusicOntology/features/rdf/base.rdf') \ No newline at end of file