Mercurial > hg > audio-features-catalogue
diff pdfextract/graphDefs.py @ 1:365a37a2fb6c
added files from pdfextract directory
author | nothing@tehis.net |
---|---|
date | Mon, 25 Feb 2013 14:47:41 +0000 |
parents | |
children | 8bd8453e0551 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pdfextract/graphDefs.py Mon Feb 25 14:47:41 2013 +0000 @@ -0,0 +1,139 @@ +import rdflib +from rdflib import Graph, RDF, RDFS, plugin, URIRef, Literal, OWL + +abbr = { + "Zero Crossing Rate": "ZCR", + "Mel-scale Frequency Cepstral Coefficients": "MFCC", + "Linear Predictive Coding": "LPC", + "Linear Prediction Cepstral Coefficients": "LPCC", + "Zero Crossing Peak Amplitudes": "ZCPA", + "Line Spectral Frequencies": "LSF", + "Short-Time Energy": "STE", + "Amplitude Descriptor": "AD", + "Adaptive Time Frequency Transform": "ATFT", + "Daubechies Wavelet Coefficient Histogram": "DWCH", + "Spectral Flux": "SF", + "Group Delay Function": "GDF", + "Modified Group Delay Function": "MGDF", + "Spectral Centroid": "SC", + "Subband Spectral Flux": "SSF", + "Perceptual Linear Prediction": "PLP", + "Linear Spectral Pairs": "LSP", + "Average Magnitude Difference Function": "AMDF", + "Octave Band Signal Intensity": "OBSI", + "Root Mean Square": "RMS", + "Harmonic Pitch Class Profile": "HPCP" +} + +synonyms = { + "Mel-scale Frequency Cepstral Coefficients": ["Mel Frequency Cepstral Coefficients", "Mel-Frequency Cepstral Coefficients", "Coefficients", "Mfcc"], + "Spectral Kurtosis": ["Kurtosis", "Spectral kurtosis"], + "Spectral Rolloff": ["Rolloff", "Spectral Rolloff Point"], + "Zero Crossing Rate": ["Zero Crossing", "Zcr", "Zero Crossings"], + "Spectral Skewness": ["Skewness", "Spectral skewness"], + "Spectral Flux": ["Flux"], + "Spectral Centroid": ["Centroid", "Spectral centroid"], + "Spectral Slope": ["Spectral slope"], + "Spectral Flatness": ["Spectral Flatness Measure", "Flatness"], + "Harmonic Spectrum": ["Harmonic spectrum"], + "Average Magnitude Difference Function": ["Amdf"], + "AutoCorrelation": ["Autocorrelation"], + "PeakSpectrum": ["Peak spectrum"], + "Spectral Spread": ["Spread"], + "Spectral Crest": ["Spectral Crest Measure"], + "Onset Detection Function": ["Onset", "Onsets"], + "Root Mean Square": ["Rms"] +} + +execfile('/Users/alo/Development/python-Levenshtein-0.10.2/StringMatcher.py') + +def checkSynonyms( name ): + rtn = "" + for key, syns in synonyms.items(): + for item in syns: + if name.replace(' ', '').replace('-', '') == item.replace(' ', '').replace('-', ''): + rtn = key.replace(' ', '').replace('-', '') + break + return rtn + +def checkAbbreviations( name ): + rtn = "" + for key, ab in abbr.items(): + if name.replace(' ', '').replace('-', '').lower() == ab.replace(' ', '').replace('-', '').lower(): + rtn = key.replace(' ', '').replace('-', '') + break + return rtn + + +def loadBase( graph, path ): + graph.parse(path) + for su, pr in graph.subject_predicates(OWL.Class): + graph.add((su, RDFS.subClassOf, URIRef(ns+'AudioFeature'))) + +def addBaseTriples( graph, ns ): + graph.add(( + URIRef(ns+'Signal'), + RDF.type, + OWL.Class + )) + + graph.add(( + URIRef(ns+'Feature'), + RDF.type, + OWL.Class + )) + + graph.add(( + URIRef(ns+'AudioFeature'), + RDFS.subClassOf, + URIRef(ns+'Signal') + )) + + +def addTriplesFromFile( graph, path, ns ): + loc = Graph() + loc.parse(path) + + for su in loc.subjects(RDF.type, RDFS.Resource): + name = su.split('/')[-1] + + ids = "" + + ids = checkSynonyms(name) + + if ids == "": + ids = checkAbbreviations(name) + + if ids == "": + ids = name.replace(' ','').replace('-','') + + graph.add(( + URIRef(ns + ids), + RDF.type, + OWL.Class + )) + graph.add(( + URIRef(ns + ids), + RDFS.subClassOf, + URIRef(ns+'AudioFeature') + )) + for pr, ob in loc.predicate_objects(su): + if ob != RDFS.Resource: + graph.add(( URIRef(ns + ids), pr, ob )) + + graph.add(( URIRef(ns + ids), URIRef(ns+'computedIn'), Literal(path.split('/')[-1][3:-4]) )) + + +def compareForSimilarities( graph, ns, threshold=0.75 ): + for s, p in graph.subject_predicates(OWL.Class): + for ss, pp in graph.subject_predicates(OWL.Class): + it = s.split('/')[-1] + other = ss.split('/')[-1] + if s != ss: + m = StringMatcher() + m.set_seqs(it, other) + score = float(m.distance()) / ((len(it) + len(other)) / 2.0) + if score < (1 - threshold): + graph.add((s, URIRef(ns + 'similarTo'), ss)) + #graph.add((s, URIRef(ns + 'similarity'), Literal(1.0-score))) + \ No newline at end of file