Mercurial > hg > dml-open-cliopatria
diff dml-cla/python/n3Parser.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dml-cla/python/n3Parser.py Tue Feb 09 21:05:06 2016 +0100 @@ -0,0 +1,75 @@ +# Part of DML (Digital Music Laboratory) +# Copyright 2014-2015 AUTHOR_AFFILIATION + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +from rdflib import Graph +from rdflib.plugins.parsers.notation3 import BadSyntax +import warnings +import codecs +import platform + +# Load and parse an n3 file +def get_rdf_graph_from_n3(n3_file_uri): + + graph = Graph() + + try: + graph.parse(n3_file_uri, format="n3") + except UnicodeDecodeError: + + n3_file_str = uri2path(n3_file_uri) + n3_file_iso = codecs.open(n3_file_str, 'r', "iso-8859-1") + + # check if n3 is valid and parse + # repair if necessary + graph = parse_potentially_corrupt_n3(n3_file_iso.read()) + + except (AssertionError, BadSyntax): + + n3_file_str = uri2path(n3_file_uri) + n3_file = open(n3_file_str, 'r') + graph = parse_potentially_corrupt_n3(n3_file.read()) + + return graph + +# can parse truncated n3 +def parse_potentially_corrupt_n3(content): + feature_graph = Graph() + # test if file is complete. + # if not, delete the last corrupted entry + if not '.' in content[-4:]: + warnings.warn("Incomplete rdf file, ignoring last entry") + # we find the last correct event + lastentry = content.rfind(':event') + feature_graph.parse(data=content[:lastentry], format="n3") + else: + feature_graph.parse(data=content, format="n3") + + return feature_graph + +# returns filepath from url +def uri2path(n3_file_uri): + + n3_file_uri_str = n3_file_uri.__str__() + + # Assume that n3_file_uri_str starts with 'file://' - we need to remove that + if 'Win' in platform.system(): + FILE_URI_START_INDEX = 8 + else: + FILE_URI_START_INDEX = 7 + + n3_file_str = n3_file_uri_str[FILE_URI_START_INDEX:len(n3_file_uri_str)] + return n3_file_str \ No newline at end of file