diff pdfextract/graphDefs.py @ 1:365a37a2fb6c

added files from pdfextract directory
author nothing@tehis.net
date Mon, 25 Feb 2013 14:47:41 +0000
parents
children 8bd8453e0551
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pdfextract/graphDefs.py	Mon Feb 25 14:47:41 2013 +0000
@@ -0,0 +1,139 @@
+import rdflib
+from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL
+
+abbr = {
+	"Zero Crossing Rate": "ZCR",
+	"Mel-scale Frequency Cepstral Coefficients": "MFCC",
+    "Linear Predictive Coding": "LPC",
+    "Linear Prediction Cepstral Coefficients": "LPCC",
+    "Zero Crossing Peak Amplitudes": "ZCPA",
+    "Line Spectral Frequencies": "LSF",
+    "Short-Time Energy": "STE",
+    "Amplitude Descriptor":  "AD",
+    "Adaptive Time Frequency Transform": "ATFT",
+    "Daubechies Wavelet Coefficient Histogram": "DWCH",
+    "Spectral Flux": "SF",
+    "Group Delay Function": "GDF",
+    "Modified Group Delay Function": "MGDF",
+    "Spectral Centroid": "SC",
+    "Subband Spectral Flux": "SSF",
+    "Perceptual Linear Prediction": "PLP",
+    "Linear Spectral Pairs": "LSP",
+    "Average Magnitude Difference Function": "AMDF",
+    "Octave Band Signal Intensity": "OBSI",
+    "Root Mean Square": "RMS",
+    "Harmonic Pitch Class Profile": "HPCP"
+}
+
+synonyms = {
+    "Mel-scale Frequency Cepstral Coefficients": ["Mel Frequency Cepstral Coefficients", "Mel-Frequency Cepstral Coefficients", "Coefficients", "Mfcc"],
+    "Spectral Kurtosis": ["Kurtosis", "Spectral kurtosis"],
+    "Spectral Rolloff": ["Rolloff", "Spectral Rolloff Point"],
+    "Zero Crossing Rate": ["Zero Crossing", "Zcr", "Zero Crossings"],
+    "Spectral Skewness": ["Skewness", "Spectral skewness"],
+    "Spectral Flux": ["Flux"],
+    "Spectral Centroid": ["Centroid", "Spectral centroid"],
+    "Spectral Slope": ["Spectral slope"],
+    "Spectral Flatness": ["Spectral Flatness Measure", "Flatness"],
+    "Harmonic Spectrum": ["Harmonic spectrum"],
+    "Average Magnitude Difference Function": ["Amdf"],
+    "AutoCorrelation": ["Autocorrelation"],
+    "PeakSpectrum": ["Peak spectrum"],
+    "Spectral Spread": ["Spread"],
+    "Spectral Crest": ["Spectral Crest Measure"],
+    "Onset Detection Function": ["Onset", "Onsets"],
+    "Root Mean Square": ["Rms"]
+}
+
+execfile('/Users/alo/Development/python-Levenshtein-0.10.2/StringMatcher.py')
+
+def checkSynonyms( name ):
+    rtn = ""
+    for key, syns in synonyms.items():
+        for item in syns:
+            if name.replace(' ', '').replace('-', '') == item.replace(' ', '').replace('-', ''):
+                rtn = key.replace(' ', '').replace('-', '')
+                break
+    return rtn
+    
+def checkAbbreviations( name ):
+    rtn = ""
+    for key, ab in abbr.items(): 
+        if name.replace(' ', '').replace('-', '').lower() == ab.replace(' ', '').replace('-', '').lower():
+            rtn = key.replace(' ', '').replace('-', '')
+            break
+    return rtn
+    
+
+def loadBase( graph, path ):
+    graph.parse(path)
+    for su, pr in graph.subject_predicates(OWL.Class):
+        graph.add((su, RDFS.subClassOf, URIRef(ns+'AudioFeature')))
+
+def addBaseTriples( graph, ns ):
+    graph.add((
+        URIRef(ns+'Signal'),
+        RDF.type,
+        OWL.Class
+    ))
+    
+    graph.add((
+        URIRef(ns+'Feature'),
+        RDF.type,
+        OWL.Class
+    ))
+
+    graph.add((
+        URIRef(ns+'AudioFeature'),
+        RDFS.subClassOf,
+        URIRef(ns+'Signal')
+    ))
+
+    
+def addTriplesFromFile( graph, path, ns ):
+    loc = Graph()
+    loc.parse(path)
+    
+    for su in loc.subjects(RDF.type, RDFS.Resource):
+        name = su.split('/')[-1]
+        
+        ids = ""
+        
+        ids = checkSynonyms(name)
+        
+        if ids == "":
+            ids = checkAbbreviations(name)
+                
+        if ids == "":
+            ids = name.replace(' ','').replace('-','')
+        
+        graph.add((
+            URIRef(ns + ids), 
+            RDF.type, 
+            OWL.Class
+        ))
+        graph.add((
+            URIRef(ns + ids), 
+            RDFS.subClassOf,
+            URIRef(ns+'AudioFeature')
+        ))
+        for pr, ob in loc.predicate_objects(su):
+            if ob != RDFS.Resource:
+                graph.add(( URIRef(ns + ids), pr, ob ))
+        
+        graph.add(( URIRef(ns + ids), URIRef(ns+'computedIn'), Literal(path.split('/')[-1][3:-4]) ))
+
+
+def compareForSimilarities( graph, ns, threshold=0.75 ):
+    for s, p in graph.subject_predicates(OWL.Class):
+        for ss, pp in graph.subject_predicates(OWL.Class):
+            it = s.split('/')[-1] 
+            other = ss.split('/')[-1]
+            if s != ss:
+                m = StringMatcher()
+                m.set_seqs(it, other)
+                score = float(m.distance()) / ((len(it) + len(other)) / 2.0)
+                if score < (1 - threshold):
+                    graph.add((s, URIRef(ns + 'similarTo'), ss))
+                    #graph.add((s, URIRef(ns + 'similarity'), Literal(1.0-score)))
+    
\ No newline at end of file