Mercurial > hg > dml-open-backendtools
comparison pyspark/n3Parser.py @ 0:e34cf1b6fe09 tip
commit
author | Daniel Wolff |
---|---|
date | Sat, 20 Feb 2016 18:14:24 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e34cf1b6fe09 |
---|---|
1 # Part of DML (Digital Music Laboratory) | |
2 # | |
3 # This program is free software; you can redistribute it and/or | |
4 # modify it under the terms of the GNU General Public License | |
5 # as published by the Free Software Foundation; either version 2 | |
6 # of the License, or (at your option) any later version. | |
7 # | |
8 # This program is distributed in the hope that it will be useful, | |
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
11 # GNU General Public License for more details. | |
12 # | |
13 # You should have received a copy of the GNU General Public | |
14 # License along with this library; if not, write to the Free Software | |
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
16 | |
17 from rdflib import Graph | |
18 from rdflib.plugins.parsers.notation3 import BadSyntax | |
19 import warnings | |
20 import codecs | |
21 import platform | |
22 | |
23 # Load and parse an n3 file | |
24 def get_rdf_graph_from_n3(n3_file_uri): | |
25 | |
26 graph = Graph() | |
27 | |
28 try: | |
29 graph.parse(n3_file_uri, format="n3") | |
30 except UnicodeDecodeError: | |
31 | |
32 n3_file_str = uri2path(n3_file_uri) | |
33 n3_file_iso = codecs.open(n3_file_str, 'r', "iso-8859-1") | |
34 | |
35 # check if n3 is valid and parse | |
36 # repair if necessary | |
37 graph = parse_potentially_corrupt_n3(n3_file_iso.read()) | |
38 | |
39 except (AssertionError, BadSyntax): | |
40 | |
41 n3_file_str = uri2path(n3_file_uri) | |
42 n3_file = open(n3_file_str, 'r') | |
43 graph = parse_potentially_corrupt_n3(n3_file.read()) | |
44 | |
45 return graph | |
46 | |
47 # can parse truncated n3 | |
48 def parse_potentially_corrupt_n3(content): | |
49 feature_graph = Graph() | |
50 # test if file is complete. | |
51 # if not, delete the last corrupted entry | |
52 if not '.' in content[-4:]: | |
53 warnings.warn("Incomplete rdf file, ignoring last entry") | |
54 # we find the last correct event | |
55 lastentry = content.rfind(':event') | |
56 feature_graph.parse(data=content[:lastentry], format="n3") | |
57 else: | |
58 feature_graph.parse(data=content, format="n3") | |
59 | |
60 return feature_graph | |
61 | |
62 # returns filepath from url | |
63 def uri2path(n3_file_uri): | |
64 | |
65 n3_file_uri_str = n3_file_uri.__str__() | |
66 | |
67 # Assume that n3_file_uri_str starts with 'file://' - we need to remove that | |
68 if 'Win' in platform.system(): | |
69 FILE_URI_START_INDEX = 8 | |
70 else: | |
71 FILE_URI_START_INDEX = 7 | |
72 | |
73 n3_file_str = n3_file_uri_str[FILE_URI_START_INDEX:len(n3_file_uri_str)] | |
74 return n3_file_str |