annotate pdfextract/graphDefs.py @ 4:8bd8453e0551

added fuxi facts and rules
author nothing@tehis.net
date Tue, 02 Apr 2013 15:33:18 +0100
parents 365a37a2fb6c
children d5012016bf64
rev   line source
nothing@1 1 import rdflib
nothing@1 2 from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
nothing@1 3
nothing@1 4 abbr = {
nothing@1 5 "Zero Crossing Rate": "ZCR",
nothing@1 6 "Mel-scale Frequency Cepstral Coefficients": "MFCC",
nothing@1 7 "Linear Predictive Coding": "LPC",
nothing@1 8 "Linear Prediction Cepstral Coefficients": "LPCC",
nothing@1 9 "Zero Crossing Peak Amplitudes": "ZCPA",
nothing@1 10 "Line Spectral Frequencies": "LSF",
nothing@1 11 "Short-Time Energy": "STE",
nothing@1 12 "Amplitude Descriptor": "AD",
nothing@1 13 "Adaptive Time Frequency Transform": "ATFT",
nothing@1 14 "Daubechies Wavelet Coefficient Histogram": "DWCH",
nothing@1 15 "Spectral Flux": "SF",
nothing@1 16 "Group Delay Function": "GDF",
nothing@1 17 "Modified Group Delay Function": "MGDF",
nothing@1 18 "Spectral Centroid": "SC",
nothing@1 19 "Subband Spectral Flux": "SSF",
nothing@1 20 "Perceptual Linear Prediction": "PLP",
nothing@1 21 "Linear Spectral Pairs": "LSP",
nothing@1 22 "Average Magnitude Difference Function": "AMDF",
nothing@1 23 "Octave Band Signal Intensity": "OBSI",
nothing@1 24 "Root Mean Square": "RMS",
nothing@1 25 "Harmonic Pitch Class Profile": "HPCP"
nothing@1 26 }
nothing@1 27
nothing@1 28 synonyms = {
nothing@1 29 "Mel-scale Frequency Cepstral Coefficients": ["Mel Frequency Cepstral Coefficients", "Mel-Frequency Cepstral Coefficients", "Coefficients", "Mfcc"],
nothing@1 30 "Spectral Kurtosis": ["Kurtosis", "Spectral kurtosis"],
nothing@1 31 "Spectral Rolloff": ["Rolloff", "Spectral Rolloff Point"],
nothing@1 32 "Zero Crossing Rate": ["Zero Crossing", "Zcr", "Zero Crossings"],
nothing@1 33 "Spectral Skewness": ["Skewness", "Spectral skewness"],
nothing@1 34 "Spectral Flux": ["Flux"],
nothing@1 35 "Spectral Centroid": ["Centroid", "Spectral centroid"],
nothing@1 36 "Spectral Slope": ["Spectral slope"],
nothing@1 37 "Spectral Flatness": ["Spectral Flatness Measure", "Flatness"],
nothing@1 38 "Harmonic Spectrum": ["Harmonic spectrum"],
nothing@1 39 "Average Magnitude Difference Function": ["Amdf"],
nothing@1 40 "AutoCorrelation": ["Autocorrelation"],
nothing@1 41 "PeakSpectrum": ["Peak spectrum"],
nothing@1 42 "Spectral Spread": ["Spread"],
nothing@1 43 "Spectral Crest": ["Spectral Crest Measure"],
nothing@1 44 "Onset Detection Function": ["Onset", "Onsets"],
nothing@1 45 "Root Mean Square": ["Rms"]
nothing@1 46 }
nothing@1 47
nothing@1 48 execfile('/Users/alo/Development/python-Levenshtein-0.10.2/StringMatcher.py')
nothing@1 49
nothing@1 50 def checkSynonyms( name ):
nothing@1 51 rtn = ""
nothing@1 52 for key, syns in synonyms.items():
nothing@1 53 for item in syns:
nothing@4 54 if name.replace(' ', '').replace('-', '').lower() == item.replace(' ', '').replace('-', '').lower():
nothing@1 55 rtn = key.replace(' ', '').replace('-', '')
nothing@1 56 break
nothing@1 57 return rtn
nothing@1 58
nothing@1 59 def checkAbbreviations( name ):
nothing@1 60 rtn = ""
nothing@1 61 for key, ab in abbr.items():
nothing@1 62 if name.replace(' ', '').replace('-', '').lower() == ab.replace(' ', '').replace('-', '').lower():
nothing@1 63 rtn = key.replace(' ', '').replace('-', '')
nothing@1 64 break
nothing@1 65 return rtn
nothing@1 66
nothing@1 67
nothing@1 68 def loadBase( graph, path ):
nothing@1 69 graph.parse(path)
nothing@1 70 for su, pr in graph.subject_predicates(OWL.Class):
nothing@1 71 graph.add((su, RDFS.subClassOf, URIRef(ns+'AudioFeature')))
nothing@1 72
nothing@1 73 def addBaseTriples( graph, ns ):
nothing@1 74 graph.add((
nothing@1 75 URIRef(ns+'Signal'),
nothing@1 76 RDF.type,
nothing@1 77 OWL.Class
nothing@1 78 ))
nothing@1 79
nothing@1 80 graph.add((
nothing@1 81 URIRef(ns+'Feature'),
nothing@1 82 RDF.type,
nothing@1 83 OWL.Class
nothing@1 84 ))
nothing@1 85
nothing@1 86 graph.add((
nothing@1 87 URIRef(ns+'AudioFeature'),
nothing@1 88 RDFS.subClassOf,
nothing@1 89 URIRef(ns+'Signal')
nothing@1 90 ))
nothing@1 91
nothing@1 92
nothing@1 93 def addTriplesFromFile( graph, path, ns ):
nothing@1 94 loc = Graph()
nothing@1 95 loc.parse(path)
nothing@1 96
nothing@1 97 for su in loc.subjects(RDF.type, RDFS.Resource):
nothing@1 98 name = su.split('/')[-1]
nothing@1 99
nothing@1 100 ids = ""
nothing@1 101
nothing@1 102 ids = checkSynonyms(name)
nothing@1 103
nothing@1 104 if ids == "":
nothing@1 105 ids = checkAbbreviations(name)
nothing@1 106
nothing@1 107 if ids == "":
nothing@1 108 ids = name.replace(' ','').replace('-','')
nothing@1 109
nothing@1 110 graph.add((
nothing@1 111 URIRef(ns + ids),
nothing@1 112 RDF.type,
nothing@1 113 OWL.Class
nothing@1 114 ))
nothing@1 115 graph.add((
nothing@1 116 URIRef(ns + ids),
nothing@1 117 RDFS.subClassOf,
nothing@1 118 URIRef(ns+'AudioFeature')
nothing@1 119 ))
nothing@1 120 for pr, ob in loc.predicate_objects(su):
nothing@1 121 if ob != RDFS.Resource:
nothing@1 122 graph.add(( URIRef(ns + ids), pr, ob ))
nothing@1 123
nothing@1 124 graph.add(( URIRef(ns + ids), URIRef(ns+'computedIn'), Literal(path.split('/')[-1][3:-4]) ))
nothing@1 125
nothing@1 126
nothing@1 127 def compareForSimilarities( graph, ns, threshold=0.75 ):
nothing@1 128 for s, p in graph.subject_predicates(OWL.Class):
nothing@1 129 for ss, pp in graph.subject_predicates(OWL.Class):
nothing@1 130 it = s.split('/')[-1]
nothing@1 131 other = ss.split('/')[-1]
nothing@1 132 if s != ss:
nothing@1 133 m = StringMatcher()
nothing@1 134 m.set_seqs(it, other)
nothing@1 135 score = float(m.distance()) / ((len(it) + len(other)) / 2.0)
nothing@1 136 if score < (1 - threshold):
nothing@1 137 graph.add((s, URIRef(ns + 'similarTo'), ss))
nothing@1 138 #graph.add((s, URIRef(ns + 'similarity'), Literal(1.0-score)))
nothing@1 139