view pdfextract/graphDefs.py @ 18:d5012016bf64 tip

added rdfpy and rdfonto directories
author nothing@tehis.net
date Tue, 23 Apr 2013 11:49:20 +0100
parents 8bd8453e0551
children
line wrap: on
line source
import rdflib
from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL

abbr = {
	"Zero Crossing Rate": "ZCR",
	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
    "Linear Predictive Coding": "LPC",
    "Linear Prediction Cepstral Coefficients": "LPCC",
    "Zero Crossing Peak Amplitudes": "ZCPA",
    "Line Spectral Frequencies": "LSF",
    "Short-Time Energy": "STE",
    "Amplitude Descriptor":  "AD",
    "Adaptive Time Frequency Transform": "ATFT",
    "Daubechies Wavelet Coefficient Histogram": "DWCH",
    "Spectral Flux": "SF",
    "Group Delay Function": "GDF",
    "Modified Group Delay Function": "MGDF",
    "Spectral Centroid": "SC",
    "Subband Spectral Flux": "SSF",
    "Perceptual Linear Prediction": "PLP",
    "Linear Spectral Pairs": "LSP",
    "Average Magnitude Difference Function": "AMDF",
    "Octave Band Signal Intensity": "OBSI",
    "Root Mean Square": "RMS",
    "Harmonic Pitch Class Profile": "HPCP",
    "SignalToNoiseRatio": "SNR"
}

synonyms = {
    "Mel-scale Frequency Cepstral Coefficients": ["Mel Frequency Cepstral Coefficients", "Mel-Frequency Cepstral Coefficients", "Coefficients", "Mfcc"],
    "Spectral Kurtosis": ["Kurtosis", "Spectral kurtosis"],
    "Spectral Rolloff": ["Rolloff", "Spectral Rolloff Point"],
    "Zero Crossing Rate": ["Zero Crossing", "Zcr", "Zero Crossings"],
    "Spectral Skewness": ["Skewness", "Spectral skewness"],
    "Spectral Flux": ["Flux"],
    "Spectral Centroid": ["Centroid", "Spectral centroid"],
    "Spectral Slope": ["Spectral slope"],
    "Spectral Flatness": ["Spectral Flatness Measure", "Flatness"],
    "Harmonic Spectrum": ["Harmonic spectrum"],
    "Average Magnitude Difference Function": ["Amdf"],
    "Average Squared Difference Function": ["Asdf"],
    "AutoCorrelation": ["Autocorrelation"],
    "PeakSpectrum": ["Peak spectrum"],
    "Spectral Spread": ["Spread"],
    "Spectral Crest": ["Spectral Crest Measure"],
    "Onset Detection Function": ["Onset", "Onsets", "Onset Detector"],
    "Root Mean Square": ["Rms"],
    "Note Tracker": ["Aubio Note Tracker"],
    "Pitch": ["Aubio Pitch Detector"],
    "Silence Test": ["Aubio Silence Detector"],
    "AutoCorrelationFFT": ["Autocorrelationfft"],
    "Average Deviation": ["Average deviation"],
    "Bark Coefficients": ["Bark coefficients"],
    "Beat Spectrum": ["Beat Spectra"],
    "Beat Tracker": ["Beat Tracking", "BeatTracking" "BeatTrack", "Beat Track", "BeatTrack2", "Beats", "Aubio Beat Tracker"],
    "Complex Domain Onset Detection": ["Complex Domain Method Onset Detection Function"],
    "Discrete Cosine Transform": ["Dct"],
    "Funcdamental Frequency": ["F0", "Failsafef0"],
    "HighestValue": ["Highest value"],
    "Harmonic Product Spectrum": ["Hps"],
    "Key Detector": ["KeyTrack"],
    "KrumhanslKeyFinder": ["Krumhansl_key_finder"],
    "L-Norm": ["Lnorm"],
    "Lowest Value": ["Lowest value"],
    "MELODIAMelodyExtraction": ["MELODIAMelodyExtractionintermediatesteps"],
    "MIDI Note": ["MIDI"],
    "Note Onset Detector": ["Note Onsets", "Note Onset"],
    "Octave Band Signal Intensity Ratio": ["OBSIR"],
    "OddToEvenHarmonicRatio": ["Oddtoevenharmonicratio"],
    "OddEvenRatio": ["Oddevenratio"],
    "Pitch Countours": ["PitchContours:All", "PitchContours:Melody"],
    "PitchFFTYIN": ["PitchFftYin"],
    "PitchYIN": ["PitchYin", "Yin", "AubioYin"],
    "Rhythm Patterns": ["RhythmPattern"],
    "RMSAmplitude": ["Rmsamplitude"],
    "Spectral Shape": ["Spectral Shape Descriptors"],
    "Spectral Variance": ["Spectral Variation", "Spectralvariance"],
    "Spectral Average Deviation": ["Spectralaveragedeviation"],
    "Spectral Inharmonicity": ["Spectralinharmonicity"],
    "Spectral Mean": ["Spectralmean"],
    "Spectral Standard Deviation": ["Spectralstandarddeviation"],
    "Standard Deviation": ["Standarddeviation"],
    "Tristimulus1": ["TristimulusI"],
    "Tristimulus2": ["TristimulusII"],
    "Tristimulus3": ["TristimulusIII"],
    "Even Harmonic Ratio": ["evenHarmonicRatio"]
}

execfile('/Users/alo/Development/python-Levenshtein-0.10.2/StringMatcher.py')

def checkSynonyms( name ):
    rtn = ""
    for key, syns in synonyms.items():
        for item in syns:
            if name.replace(' ', '').replace('-', '').lower() == item.replace(' ', '').replace('-', '').lower():
                rtn = key.replace(' ', '').replace('-', '')
                break
    return rtn
    
def checkAbbreviations( name ):
    rtn = ""
    for key, ab in abbr.items(): 
        if name.replace(' ', '').replace('-', '').lower() == ab.replace(' ', '').replace('-', '').lower():
            rtn = key.replace(' ', '').replace('-', '')
            break
    return rtn
    

def loadBase( graph, path ):
    graph.parse(path)
    for su, pr in graph.subject_predicates(OWL.Class):
        graph.add((su, RDFS.subClassOf, URIRef(ns+'AudioFeature')))

def addBaseTriples( graph, ns ):
    graph.add((
        URIRef(ns+'Signal'),
        RDF.type,
        OWL.Class
    ))
    
    graph.add((
        URIRef(ns+'Feature'),
        RDF.type,
        OWL.Class
    ))

    graph.add((
        URIRef(ns+'AudioFeature'),
        RDFS.subClassOf,
        URIRef(ns+'Signal')
    ))

    
def addTriplesFromFile( graph, path, ns ):
    loc = Graph()
    loc.parse(path)
    
    for su in loc.subjects(RDF.type, RDFS.Resource):
        name = su.split('/')[-1]
        
        ids = ""
        
        ids = checkSynonyms(name)
        
        if ids == "":
            ids = checkAbbreviations(name)
                
        if ids == "":
            ids = name.replace(' ','').replace('-','')
        
        graph.add((
            URIRef(ns + ids), 
            RDF.type, 
            OWL.Class
        ))
        graph.add((
            URIRef(ns + ids), 
            RDFS.subClassOf,
            URIRef(ns+'AudioFeature')
        ))
        for pr, ob in loc.predicate_objects(su):
            if ob != RDFS.Resource:
                graph.add(( URIRef(ns + ids), pr, ob ))
        
        graph.add(( URIRef(ns + ids), URIRef(ns+'computedIn'), Literal(path.split('/')[-1][3:-4]) ))


def compareForSimilarities( graph, ns, threshold=0.75 ):
    for s, p in graph.subject_predicates(OWL.Class):
        for ss, pp in graph.subject_predicates(OWL.Class):
            it = s.split('/')[-1] 
            other = ss.split('/')[-1]
            if s != ss:
                m = StringMatcher()
                m.set_seqs(it, other)
                score = float(m.distance()) / ((len(it) + len(other)) / 2.0)
                if score < (1 - threshold):
                    graph.add((s, URIRef(ns + 'similarTo'), ss))
                    #graph.add((s, URIRef(ns + 'similarity'), Literal(1.0-score)))