view pdfextract/parseMarsyas.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children
line wrap: on
line source
import rdflib, os, fnmatch
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal

execfile('/Users/alo/Downloads/python-Levenshtein-0.10.2/StringMatcher.py')

mdir = '/Users/alo/Development/MIR/marsyas-0.4.7/src/marsyas/'
#magraph = Graph()
madict = {}
#current = Graph()
#current.parse('docfeatures.rdf')

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Zero crossing peak amplitudes": "ZCPA",
    "Line spectral frequencies": "LSF",
    "Short-time energy": "STE",
    "Amplitude descriptor":  "AD",
    "Adaptive time frequency transform": "ATFT",
    "Daubechies Wavelet coefficient histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group delay function": "GDF",
    "Modified group delay function": "MGDF",
    "Spectral centroid": "SC",
    "Subband spectral flux": "SSF",
    "Perceptual linear prediction": "PLP"
}

for name in os.listdir(mdir):
    if fnmatch.fnmatch(name, '*.h'):
        code = [line.strip() for line in open(mdir + name)]
        found = False
        for line in code:
            if line.find('\ingroup Analysis') >= 0:
                found = True
                break
        
        if found:
            i = 0
            cl = ''
            for line in code:
                if line.find('\class') >= 0:
                    cl = line.split(' ')[-1]
                    madict[cl] = {'brief': code[i+2][7:]} 
                    if code[i+3] != '':
                        madict[cl]['brief'] += code[i+3]
                        
                    break
                    
                i += 1
                        
            score = 100
            madict[cl]['score'] = 100
            madict[cl]['name'] = ""
            for s, p, o in current.triples((None, None, RDFS.Resource)):                                    
                for cs, cp, name in current.triples((s, URIRef('http://sovarr.c4dm.eecs.qmul.ac.uk/features/feature'), None)):
                    m = StringMatcher()
                    m.set_seqs(Literal(cl), name)
                    sc = float(m.distance()) / ((len(cl) + len(name)) / 2.0)
                    if sc < score:
                        madict[cl]['score'] = 1.0 - sc
                        madict[cl]['name'] = name
                        score = sc
            
            if madict[cl]['score'] < 0.75:                
                for k in abbr.keys():
                    if abbr[k] == cl:
                        madict[cl]['score'] = 1.0
                        madict[cl]['name'] = k