Mercurial > hg > dml-open-cliopatria
view dml-cla/python/n3Parser.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line source
# Part of DML (Digital Music Laboratory) # Copyright 2014-2015 AUTHOR_AFFILIATION # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA from rdflib import Graph from rdflib.plugins.parsers.notation3 import BadSyntax import warnings import codecs import platform # Load and parse an n3 file def get_rdf_graph_from_n3(n3_file_uri): graph = Graph() try: graph.parse(n3_file_uri, format="n3") except UnicodeDecodeError: n3_file_str = uri2path(n3_file_uri) n3_file_iso = codecs.open(n3_file_str, 'r', "iso-8859-1") # check if n3 is valid and parse # repair if necessary graph = parse_potentially_corrupt_n3(n3_file_iso.read()) except (AssertionError, BadSyntax): n3_file_str = uri2path(n3_file_uri) n3_file = open(n3_file_str, 'r') graph = parse_potentially_corrupt_n3(n3_file.read()) return graph # can parse truncated n3 def parse_potentially_corrupt_n3(content): feature_graph = Graph() # test if file is complete. # if not, delete the last corrupted entry if not '.' in content[-4:]: warnings.warn("Incomplete rdf file, ignoring last entry") # we find the last correct event lastentry = content.rfind(':event') feature_graph.parse(data=content[:lastentry], format="n3") else: feature_graph.parse(data=content, format="n3") return feature_graph # returns filepath from url def uri2path(n3_file_uri): n3_file_uri_str = n3_file_uri.__str__() # Assume that n3_file_uri_str starts with 'file://' - we need to remove that if 'Win' in platform.system(): FILE_URI_START_INDEX = 8 else: FILE_URI_START_INDEX = 7 n3_file_str = n3_file_uri_str[FILE_URI_START_INDEX:len(n3_file_uri_str)] return n3_file_str