view dml-cla/python/n3Parser.py @ 0:718306e29690 tip

commiting public release
author Daniel Wolff
date Tue, 09 Feb 2016 21:05:06 +0100
parents
children
line wrap: on
line source
# Part of DML (Digital Music Laboratory)
# Copyright 2014-2015 AUTHOR_AFFILIATION
 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

from rdflib import Graph
from rdflib.plugins.parsers.notation3 import BadSyntax
import warnings
import codecs
import platform

# Load and parse an n3 file
def get_rdf_graph_from_n3(n3_file_uri):

    graph = Graph()

    try:
        graph.parse(n3_file_uri, format="n3")
    except UnicodeDecodeError:
        
        n3_file_str = uri2path(n3_file_uri)
        n3_file_iso = codecs.open(n3_file_str, 'r', "iso-8859-1")

        # check if n3 is valid and parse
        # repair if necessary
        graph = parse_potentially_corrupt_n3(n3_file_iso.read())

    except (AssertionError, BadSyntax):

        n3_file_str = uri2path(n3_file_uri)
        n3_file = open(n3_file_str, 'r')
        graph = parse_potentially_corrupt_n3(n3_file.read())

    return graph

# can parse truncated n3
def parse_potentially_corrupt_n3(content):
    feature_graph = Graph()
    # test if file is complete. 
    # if not, delete the last corrupted entry
    if not '.' in content[-4:]:
        warnings.warn("Incomplete rdf file, ignoring last entry")
        # we find the last correct event
        lastentry = content.rfind(':event')
        feature_graph.parse(data=content[:lastentry], format="n3")
    else:
        feature_graph.parse(data=content, format="n3")

    return feature_graph

# returns filepath from url
def uri2path(n3_file_uri):
    
    n3_file_uri_str = n3_file_uri.__str__()
    
    # Assume that n3_file_uri_str starts with 'file://' - we need to remove that
    if 'Win' in platform.system():
        FILE_URI_START_INDEX = 8
    else:
        FILE_URI_START_INDEX = 7
            
    n3_file_str = n3_file_uri_str[FILE_URI_START_INDEX:len(n3_file_uri_str)]
    return n3_file_str