Mercurial > hg > audio-features-catalogue
view pdfextract/makeAFGraph.py @ 18:d5012016bf64 tip
added rdfpy and rdfonto directories
author | nothing@tehis.net |
---|---|
date | Tue, 23 Apr 2013 11:49:20 +0100 |
parents | 365a37a2fb6c |
children |
line wrap: on
line source
import rdflib, os, fnmatch, urllib2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal from xml.dom.minidom import parseString names = [line.strip() for line in open('pdfextract/names.txt')] cat = [line.strip() for line in open('pdfextract/categories.txt')] sig = [line.strip() for line in open('pdfextract/sig.txt')] local = 'http://sovarr.c4dm.eecs.qmul.ac.uk/features/' afgr = Graph() afgr.bind('local', URIRef(local)) afgr.bind('dc', URIRef('http://purl.org/dc/elements/1.1/')) def split_uppercase(string): x='' for i in string: if i.isupper(): x+=' %s' %i else: x+=i return x.strip() i = 0 order = [ "Zero Crossing Rate", "Linear Predictive Coding", "Mel-scale Frequency Cepstral Coefficients", "Auditory Filter Bank Temporal Envelopes", "Rate-scale-frequency Features", "Phase Space Features" ] domains = { "Zero Crossing Rate": 'temporal', "Linear Predictive Coding": 'frequency', "Mel-scale Frequency Cepstral Coefficients": 'cepstral', "Auditory Filter Bank Temporal Envelopes": 'modulation frequency', "Rate-scale-frequency Features": 'eigendomain', "Phase Space Features": 'phase space' } abbr = { "Zero Crossing Rate": "ZCR", "Mel-scale Frequency Cepstral Coefficients": "MFCC", "Linear Predictive Coding": "LPC", "Linear Prediction Cepstral Coefficients": "LPCC", "Zero crossing peak amplitudes": "ZCPA", "Line spectral frequencies": "LSF", "Short-time energy": "STE", "Amplitude descriptor": "AD", "Adaptive time frequency transform": "ATFT", "Daubechies Wavelet coefficient histogram": "DWCH", "Spectral Flux": "SF", "Group delay function": "GDF", "Modified group delay function": "MGDF", "Spectral centroid": "SC", "Subband spectral flux": "SSF", "Perceptual linear prediction": "PLP" } domain = "" domainIndex = 0 compdict = {} for filename in ['filters', 'trans', 'aggr']: for line in [line.strip() for line in open('pdfextract/' + filename + '.txt')]: compdict[line[0]] = line[2:] for name in names: id = local + (name.replace(' ','').replace('-','')) if name == order[domainIndex]: domain = domains[order[domainIndex]] domainIndex += 1 afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(name) )) afgr.add(( URIRef(id), URIRef(local + 'domain'), Literal(domain) )) word = cat[i].split(' ') temp = { 'I': 'intraframe', 'X': 'interframe', 'G': 'global' }[word[0]] afgr.add(( URIRef(id), URIRef(local + 'temporalscale'), Literal(temp) )) if word[1] == 'Y': temp = 'perceptual' else: temp = 'physical' afgr.add(( URIRef(id), URIRef(local + 'level'), Literal(temp) )) if word[2] == 'Y': afgr.add(( URIRef(id), URIRef(local + 'model'), Literal('psychoacoustic') )) temp = { 'L': 'low', 'M': 'medium', 'H': 'high' }[word[3]] afgr.add(( URIRef(id), URIRef(local + 'complexity'), Literal(temp) )) if word[4] == 'V': temp = 'parameterized' else: temp = word[4] afgr.add(( URIRef(id), URIRef(local + 'dimensions'), Literal(temp) )) temp = { 'ASR': "speech recognition", 'ESR': "environmental sound recognition", 'MIR': "music information retrieval", 'AS': "audio segmentation", 'FP': "fingerprinting", 'VAR': "several", 'EXC': '' }[word[5]] if temp != '': afgr.add(( URIRef(id), URIRef(local + 'appdomain'), Literal(temp) )) steps = sig[i].split(' ') for key in steps: afgr.add(( URIRef(id), URIRef(local + 'computation'), Literal(compdict[key]) )) if name.find('MPEG-7') >= 0: afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('MPEG-7') )) if name in abbr.keys(): afgr.add(( URIRef(id), URIRef(local + 'abbreviation'), Literal(abbr[name]) )) i += 1 execfile('/Users/alo/Downloads/python-Levenshtein-0.10.2/StringMatcher.py') ############# Vamp ############### vamp = Graph() vamp.parse('/Users/alo/Development/qm/qm-vamp-plugins/qm-vamp-plugins.n3', format='n3') vampdict = {} current = afgr for s, p, o in vamp.triples((None, None, URIRef('http://purl.org/ontology/vamp/Plugin'))): for vs, vp, vo in vamp.triples((s, URIRef('http://purl.org/ontology/vamp/name'), None )): score = 100 vampdict[vo] = {'score': 0, 'name': ""} for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(vo, name) sc = float(m.distance()) / ((len(vo) + len(name)) / 2.0) if sc < score: vampdict[vo]['score'] = 1.0 - sc vampdict[vo]['name'] = name score = sc for k in vampdict.keys(): if vampdict[k]['score'] > 0.75: name = vampdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Vamp Plugins') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(k) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Vamp Plugins') )) ############# Marsyas ############### mdir = '/Users/alo/Development/MIR/marsyas-0.4.7/src/marsyas/' madict = {} for name in os.listdir(mdir): if fnmatch.fnmatch(name, '*.h'): code = [line.strip() for line in open(mdir + name)] found = False for line in code: if line.find('\ingroup Analysis') >= 0: found = True break if found: i = 0 cl = '' for line in code: if line.find('\class') >= 0: cl = line.split(' ')[-1] madict[cl] = {'brief': code[i+2][7:]} if code[i+3] != '': madict[cl]['brief'] += code[i+3] break i += 1 score = 100 madict[cl]['score'] = 0 madict[cl]['name'] = "" for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(cl), name) sc = float(m.distance()) / ((len(cl) + len(name)) / 2.0) if sc < score: madict[cl]['score'] = 1.0 - sc madict[cl]['name'] = name score = sc if madict[cl]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == cl: madict[cl]['score'] = 1.0 madict[cl]['name'] = k for k in madict.keys(): if madict[k]['score'] > 0.75: name = madict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Marsyas') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(k) )) afgr.add(( URIRef(id), URIRef(local + 'description'), Literal(madict[k]['brief']) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Marsyas') )) ############# jMIR ############### jdir = '/Users/alo/Development/MIR/jAudio/jAudio/' jdict = {} file = urllib2.urlopen('file://' + jdir + 'features.xml') data = file.read() file.close() dom = parseString(data) jmir = dom.getElementsByTagName('feature') for nodes in jmir: jname = nodes.childNodes[1].firstChild.nodeValue.split('.')[-1] jdict[jname] = {'score': 0, 'name': ""} # if len(nodes.childNodes) == 5: # print nodes.childNodes[3] score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(jname), name) sc = float(m.distance()) / ((len(jname) + len(name)) / 2.0) if sc < score: jdict[jname]['score'] = 1.0 - sc jdict[jname]['name'] = name score = sc if jdict[jname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == jname: jdict[jname]['score'] = 1.0 jdict[jname]['name'] = k for k in jdict.keys(): if jdict[k]['score'] > 0.75: name = jdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('jMIR') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(k) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('jMIR') )) ############# libXtract ############### path = '/Users/alo/Development/MIR/LibXtract/xtract/libxtract.h' lines = [line.strip() for line in open(path)] xtract = lines[(lines.index('enum xtract_features_ {')+1):(lines.index('XTRACT_WINDOWED')-1)] xdict = {} for ln in xtract: xname = ln[(ln.index('_')+1):-1].replace("_", " ").lower() xdict[xname] = {'score': 0, 'name': ""} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(xname), name) sc = float(m.distance()) / ((len(xname) + len(name)) / 2.0) if sc < score: xdict[xname]['score'] = 1.0 - sc xdict[xname]['name'] = name score = sc if xdict[xname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == xname.upper(): xdict[xname]['score'] = 1.0 xdict[xname]['name'] = k for k in xdict.keys(): if xdict[k]['score'] > 0.75: name = xdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('libXtract') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(k) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('libXtract') )) ############# yaafe ############### path = "/Users/alo/Development/MIR/yaafe-v0.64/src_python/yaafefeatures.py" lines = [line.strip() for line in open(path)] ydict = {} for ln in lines: if ln.find('class ') >= 0: yname = ln[6:ln.find('(AudioFeature)')] ydict[yname] = {'score': 0, 'name': ""} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(yname), name) sc = float(m.distance()) / ((len(yname) + len(name)) / 2.0) if sc < score: ydict[yname]['score'] = 1.0 - sc ydict[yname]['name'] = name score = sc if ydict[yname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == yname: ydict[yname]['score'] = 1.0 ydict[yname]['name'] = k for k in ydict.keys(): if ydict[k]['score'] > 0.75: name = ydict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('yaafe') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(k) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('yaafe') )) ############# MIRToolbox ############### path = "/Users/alo/MusicOntology/features/rdf/af-MIRToolbox.rdf" mirt = Graph() mirt.parse(path) mdict = {} for s, p, o in mirt.triples((None, None, RDFS.Resource)): mname = s.split('/')[-1] mdict[mname] = {'score': 0, 'name':"", 'sub': s} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(mname), name) sc = float(m.distance()) / ((len(mname) + len(name)) / 2.0) if sc < score: mdict[mname]['score'] = 1.0 - sc mdict[mname]['name'] = name score = sc if mdict[mname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == mname: mdict[mname]['score'] = 1.0 mdict[mname]['name'] = k for k in mdict.keys(): if mdict[k]['score'] > 0.77: name = mdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('MIRToolbox') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(split_uppercase(k)) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('MIRToolbox') )) for s, p, o in mirt.triples((mdict[k]['sub'], None, None)): if o != RDFS.Resource: afgr.add((URIRef(id), p, o)) ############# CLAM ############### path = "/Users/alo/MusicOntology/features/rdf/af-CLAM.rdf" clam = Graph() clam.parse(path) cdict = {} for s, p, o in clam.triples((None, None, RDFS.Resource)): cname = s.split('/')[-1] cdict[cname] = {'score': 0, 'name':"", 'sub': s} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(cname), name) sc = float(m.distance()) / ((len(cname) + len(name)) / 2.0) if sc < score: cdict[cname]['score'] = 1.0 - sc cdict[cname]['name'] = name score = sc if cdict[cname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == cname: cdict[cname]['score'] = 1.0 cdict[cname]['name'] = k for k in cdict.keys(): if cdict[k]['score'] > 0.77: name = cdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('CLAM') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(split_uppercase(k)) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('CLAM') )) for s, p, o in clam.triples((cdict[k]['sub'], None, None)): if o != RDFS.Resource: afgr.add((URIRef(id), p, o)) ############# SCMIR ############### path = "/Users/alo/MusicOntology/features/rdf/af-SuperCollider.rdf" scg = Graph() scg.parse(path) scdict = {} for s, p, o in scg.triples((None, None, RDFS.Resource)): scname = s.split('/')[-1] scdict[scname] = {'score': 0, 'name':"", 'sub': s} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(scname), name) sc = float(m.distance()) / ((len(scname) + len(name)) / 2.0) if sc < score: scdict[scname]['score'] = 1.0 - sc scdict[scname]['name'] = name score = sc if scdict[scname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == scname: scdict[scname]['score'] = 1.0 scdict[scname]['name'] = k for k in scdict.keys(): if scdict[k]['score'] > 0.77: name = scdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('SuperCollider') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(split_uppercase(k)) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('SuperCollider') )) for s, p, o in scg.triples((scdict[k]['sub'], None, None)): if o != RDFS.Resource: afgr.add((URIRef(id), p, o)) ############# aubio ############### path = "/Users/alo/MusicOntology/features/rdf/af-aubio.rdf" aug = Graph() aug.parse(path) audict = {} for s, p, o in aug.triples((None, None, RDFS.Resource)): auname = s.split('/')[-1] audict[auname] = {'score': 0, 'name':"", 'sub': s} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(auname), name) au = float(m.distance()) / ((len(auname) + len(name)) / 2.0) if au < score: audict[auname]['score'] = 1.0 - au audict[auname]['name'] = name score = au if audict[auname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == auname: audict[auname]['score'] = 1.0 audict[auname]['name'] = k for k in audict.keys(): if audict[k]['score'] > 0.77: name = audict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Aubio') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(split_uppercase(k)) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('Aubio') )) for s, p, o in aug.triples((audict[k]['sub'], None, None)): if o != RDFS.Resource: afgr.add((URIRef(id), p, o)) ############# sMIRk ############### path = "/Users/alo/MusicOntology/features/rdf/af-smirk.rdf" smg = Graph() smg.parse(path) smdict = {} for s, p, o in smg.triples((None, None, RDFS.Resource)): smname = s.split('/')[-1] smdict[smname] = {'score': 0, 'name':"", 'sub': s} score = 100 for s, p, o in current.triples((None, None, RDFS.Resource)): for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)): m = StringMatcher() m.set_seqs(Literal(smname), name) sm = float(m.distance()) / ((len(smname) + len(name)) / 2.0) if sm < score: smdict[smname]['score'] = 1.0 - sm smdict[smname]['name'] = name score = sm if smdict[smname]['score'] < 0.75: for k in abbr.keys(): if abbr[k] == smname: smdict[smname]['score'] = 1.0 smdict[smname]['name'] = k for k in smdict.keys(): if smdict[k]['score'] > 0.77: name = smdict[k]['name'] id = local + (name.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('sMIRk') )) else: id = local + (k.replace(' ','').replace('-','')) afgr.add(( URIRef(id), URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), URIRef(u'http://www.w3.org/2000/01/rdf-schema#Resource') )) afgr.add(( URIRef(id), URIRef(local + 'feature'), Literal(split_uppercase(k)) )) afgr.add(( URIRef(id), URIRef(local + 'computedIn'), Literal('sMIRk') )) for s, p, o in smg.triples((smdict[k]['sub'], None, None)): if o != RDFS.Resource: afgr.add((URIRef(id), p, o)) afgr.serialize('/Users/alo/MusicOntology/features/docfeatures.n3', format='n3') afgr.serialize('/Users/alo/MusicOntology/features/docfeatures.rdf')