Mercurial > hg > dml-open-cliopatria
comparison dml-cla/python/n3Parser.py @ 0:718306e29690 tip
commiting public release
author | Daniel Wolff |
---|---|
date | Tue, 09 Feb 2016 21:05:06 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:718306e29690 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # Copyright 2014-2015 AUTHOR_AFFILIATION | |
3 | |
4 # This program is free software; you can redistribute it and/or | |
5 # modify it under the terms of the GNU General Public License | |
6 # as published by the Free Software Foundation; either version 2 | |
7 # of the License, or (at your option) any later version. | |
8 # | |
9 # This program is distributed in the hope that it will be useful, | |
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 # GNU General Public License for more details. | |
13 # | |
14 # You should have received a copy of the GNU General Public | |
15 # License along with this library; if not, write to the Free Software | |
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | |
18 from rdflib import Graph | |
19 from rdflib.plugins.parsers.notation3 import BadSyntax | |
20 import warnings | |
21 import codecs | |
22 import platform | |
23 | |
24 # Load and parse an n3 file | |
25 def get_rdf_graph_from_n3(n3_file_uri): | |
26 | |
27 graph = Graph() | |
28 | |
29 try: | |
30 graph.parse(n3_file_uri, format="n3") | |
31 except UnicodeDecodeError: | |
32 | |
33 n3_file_str = uri2path(n3_file_uri) | |
34 n3_file_iso = codecs.open(n3_file_str, 'r', "iso-8859-1") | |
35 | |
36 # check if n3 is valid and parse | |
37 # repair if necessary | |
38 graph = parse_potentially_corrupt_n3(n3_file_iso.read()) | |
39 | |
40 except (AssertionError, BadSyntax): | |
41 | |
42 n3_file_str = uri2path(n3_file_uri) | |
43 n3_file = open(n3_file_str, 'r') | |
44 graph = parse_potentially_corrupt_n3(n3_file.read()) | |
45 | |
46 return graph | |
47 | |
48 # can parse truncated n3 | |
49 def parse_potentially_corrupt_n3(content): | |
50 feature_graph = Graph() | |
51 # test if file is complete. | |
52 # if not, delete the last corrupted entry | |
53 if not '.' in content[-4:]: | |
54 warnings.warn("Incomplete rdf file, ignoring last entry") | |
55 # we find the last correct event | |
56 lastentry = content.rfind(':event') | |
57 feature_graph.parse(data=content[:lastentry], format="n3") | |
58 else: | |
59 feature_graph.parse(data=content, format="n3") | |
60 | |
61 return feature_graph | |
62 | |
63 # returns filepath from url | |
64 def uri2path(n3_file_uri): | |
65 | |
66 n3_file_uri_str = n3_file_uri.__str__() | |
67 | |
68 # Assume that n3_file_uri_str starts with 'file://' - we need to remove that | |
69 if 'Win' in platform.system(): | |
70 FILE_URI_START_INDEX = 8 | |
71 else: | |
72 FILE_URI_START_INDEX = 7 | |
73 | |
74 n3_file_str = n3_file_uri_str[FILE_URI_START_INDEX:len(n3_file_uri_str)] | |
75 return n3_file_str |