view pdfextract/graphDefs.py @ 4:8bd8453e0551

added fuxi facts and rules
author nothing@tehis.net
date Tue, 02 Apr 2013 15:33:18 +0100
parents 365a37a2fb6c
children d5012016bf64
line wrap: on
line source
import rdflib
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Linear Prediction Cepstral Coefficients": "LPCC",
    "Zero Crossing Peak Amplitudes": "ZCPA",
    "Line Spectral Frequencies": "LSF",
    "Short-Time Energy": "STE",
    "Amplitude Descriptor":  "AD",
    "Adaptive Time Frequency Transform": "ATFT",
    "Daubechies Wavelet Coefficient Histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group Delay Function": "GDF",
    "Modified Group Delay Function": "MGDF",
    "Spectral Centroid": "SC",
    "Subband Spectral Flux": "SSF",
    "Perceptual Linear Prediction": "PLP",
    "Linear Spectral Pairs": "LSP",
    "Average Magnitude Difference Function": "AMDF",
    "Octave Band Signal Intensity": "OBSI",
    "Root Mean Square": "RMS",
    "Harmonic Pitch Class Profile": "HPCP"
}

synonyms = {
    "Mel-scale Frequency Cepstral Coefficients": ["Mel Frequency Cepstral Coefficients", "Mel-Frequency Cepstral Coefficients", "Coefficients", "Mfcc"],
    "Spectral Kurtosis": ["Kurtosis", "Spectral kurtosis"],
    "Spectral Rolloff": ["Rolloff", "Spectral Rolloff Point"],
    "Zero Crossing Rate": ["Zero Crossing", "Zcr", "Zero Crossings"],
    "Spectral Skewness": ["Skewness", "Spectral skewness"],
    "Spectral Flux": ["Flux"],
    "Spectral Centroid": ["Centroid", "Spectral centroid"],
    "Spectral Slope": ["Spectral slope"],
    "Spectral Flatness": ["Spectral Flatness Measure", "Flatness"],
    "Harmonic Spectrum": ["Harmonic spectrum"],
    "Average Magnitude Difference Function": ["Amdf"],
    "AutoCorrelation": ["Autocorrelation"],
    "PeakSpectrum": ["Peak spectrum"],
    "Spectral Spread": ["Spread"],
    "Spectral Crest": ["Spectral Crest Measure"],
    "Onset Detection Function": ["Onset", "Onsets"],
    "Root Mean Square": ["Rms"]
}

execfile('/Users/alo/Development/python-Levenshtein-0.10.2/StringMatcher.py')

def checkSynonyms( name ):
    rtn = ""
    for key, syns in synonyms.items():
        for item in syns:
            if name.replace(' ', '').replace('-', '').lower() == item.replace(' ', '').replace('-', '').lower():
                rtn = key.replace(' ', '').replace('-', '')
                break
    return rtn
    
def checkAbbreviations( name ):
    rtn = ""
    for key, ab in abbr.items(): 
        if name.replace(' ', '').replace('-', '').lower() == ab.replace(' ', '').replace('-', '').lower():
            rtn = key.replace(' ', '').replace('-', '')
            break
    return rtn
    

def loadBase( graph, path ):
    graph.parse(path)
    for su, pr in graph.subject_predicates(OWL.Class):
        graph.add((su, RDFS.subClassOf, URIRef(ns+'AudioFeature')))

def addBaseTriples( graph, ns ):
    graph.add((
        URIRef(ns+'Signal'),
        RDF.type,
        OWL.Class
    ))
    
    graph.add((
        URIRef(ns+'Feature'),
        RDF.type,
        OWL.Class
    ))

    graph.add((
        URIRef(ns+'AudioFeature'),
        RDFS.subClassOf,
        URIRef(ns+'Signal')
    ))

    
def addTriplesFromFile( graph, path, ns ):
    loc = Graph()
    loc.parse(path)
    
    for su in loc.subjects(RDF.type, RDFS.Resource):
        name = su.split('/')[-1]
        
        ids = ""
        
        ids = checkSynonyms(name)
        
        if ids == "":
            ids = checkAbbreviations(name)
                
        if ids == "":
            ids = name.replace(' ','').replace('-','')
        
        graph.add((
            URIRef(ns + ids), 
            RDF.type, 
            OWL.Class
        ))
        graph.add((
            URIRef(ns + ids), 
            RDFS.subClassOf,
            URIRef(ns+'AudioFeature')
        ))
        for pr, ob in loc.predicate_objects(su):
            if ob != RDFS.Resource:
                graph.add(( URIRef(ns + ids), pr, ob ))
        
        graph.add(( URIRef(ns + ids), URIRef(ns+'computedIn'), Literal(path.split('/')[-1][3:-4]) ))


def compareForSimilarities( graph, ns, threshold=0.75 ):
    for s, p in graph.subject_predicates(OWL.Class):
        for ss, pp in graph.subject_predicates(OWL.Class):
            it = s.split('/')[-1] 
            other = ss.split('/')[-1]
            if s != ss:
                m = StringMatcher()
                m.set_seqs(it, other)
                score = float(m.distance()) / ((len(it) + len(other)) / 2.0)
                if score < (1 - threshold):
                    graph.add((s, URIRef(ns + 'similarTo'), ss))
                    #graph.add((s, URIRef(ns + 'similarity'), Literal(1.0-score)))