Chris@0
|
1 /* $Id$
|
Chris@0
|
2
|
Chris@0
|
3 Part of SWI-Prolog
|
Chris@0
|
4
|
Chris@0
|
5 Author: Jan Wielemaker
|
Chris@0
|
6 E-mail: jan@swi.psy.uva.nl
|
Chris@0
|
7 WWW: http://www.swi-prolog.org
|
Chris@0
|
8 Copyright (C): 1985-2004, University of Amsterdam
|
Chris@0
|
9
|
Chris@0
|
10 This program is free software; you can redistribute it and/or
|
Chris@0
|
11 modify it under the terms of the GNU General Public License
|
Chris@0
|
12 as published by the Free Software Foundation; either version 2
|
Chris@0
|
13 of the License, or (at your option) any later version.
|
Chris@0
|
14
|
Chris@0
|
15 This program is distributed in the hope that it will be useful,
|
Chris@0
|
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Chris@0
|
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Chris@0
|
18 GNU General Public License for more details.
|
Chris@0
|
19
|
Chris@0
|
20 You should have received a copy of the GNU Lesser General Public
|
Chris@0
|
21 License along with this library; if not, write to the Free Software
|
Chris@0
|
22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
Chris@0
|
23
|
Chris@0
|
24 As a special exception, if you link this library with other files,
|
Chris@0
|
25 compiled with a Free Software compiler, to produce an executable, this
|
Chris@0
|
26 library does not by itself cause the resulting executable to be covered
|
Chris@0
|
27 by the GNU General Public License. This exception does not however
|
Chris@0
|
28 invalidate any other reasons why the executable file might be covered by
|
Chris@0
|
29 the GNU General Public License.
|
Chris@0
|
30 */
|
Chris@0
|
31
|
Chris@0
|
32 :- module(serql_rdf_turtle,
|
Chris@0
|
33 [ rdf_load_turtle_file/3 % +File, -Triples, +Options
|
Chris@0
|
34 ]).
|
Chris@0
|
35 :- use_module(library(assoc)).
|
Chris@0
|
36 :- use_module(library(option)).
|
Chris@0
|
37 :- use_module(library('semweb/rdf_db')).
|
Chris@0
|
38 :- use_module(library(debug)).
|
Chris@0
|
39 :- use_module(library(url)).
|
Chris@0
|
40
|
Chris@0
|
41 /* NOTE: must be changed to load library('semweb/rdf_turtle'))!!!
|
Chris@0
|
42 */
|
Chris@0
|
43
|
Chris@0
|
44
|
Chris@0
|
45 /** <module> Turtle - Terse RDF Triple Language
|
Chris@0
|
46
|
Chris@0
|
47 This module implements the Turtle language for representing the RDF
|
Chris@0
|
48 triple model as defined by Dave Beckett from the Institute for Learning
|
Chris@0
|
49 and Research Technology University of Bristol in the document:
|
Chris@0
|
50
|
Chris@0
|
51 * http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/
|
Chris@0
|
52
|
Chris@0
|
53 The current parser handles all positive and negative examples provided
|
Chris@0
|
54 by the above document at october 17, 2004.
|
Chris@0
|
55
|
Chris@0
|
56 @tbd * Much better error handling
|
Chris@0
|
57 * Write turtle data
|
Chris@0
|
58 */
|
Chris@0
|
59
|
Chris@0
|
60 %% rdf_load_turtle_file(+Input, -Triples, +Options)
|
Chris@0
|
61 %
|
Chris@0
|
62 % Read a stream or file into a set of triples of the format
|
Chris@0
|
63 %
|
Chris@0
|
64 % rdf(Subject, Predicate, Object)
|
Chris@0
|
65 %
|
Chris@0
|
66 % The representation is consistent with the SWI-Prolog RDF/XML
|
Chris@0
|
67 % and ntriples parsers. Provided options are:
|
Chris@0
|
68 %
|
Chris@0
|
69 % * base_uri(+BaseURI)
|
Chris@0
|
70 % Initial base URI. Defaults to file://<file> for loading
|
Chris@0
|
71 % files.
|
Chris@0
|
72 %
|
Chris@0
|
73 % * anon_prefix(+Prefix)
|
Chris@0
|
74 % Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
|
Chris@0
|
75 % If Prefix is not an atom blank nodes are generated as
|
Chris@0
|
76 % node(1), node(2), ...
|
Chris@0
|
77
|
Chris@0
|
78 rdf_load_turtle_file(stream(In), Triples, Options) :- !,
|
Chris@0
|
79 option(anon_prefix(Prefix), Options, '__bnode'),
|
Chris@0
|
80 load_turtle(In, [], Prefix, Triples).
|
Chris@0
|
81 rdf_load_turtle_file(RelFile, Triples, Options) :-
|
Chris@0
|
82 absolute_file_name(RelFile, File),
|
Chris@0
|
83 atom_concat('file://', File, FileURI),
|
Chris@0
|
84 atom_concat('__', FileURI, DefAnonPrefix),
|
Chris@0
|
85 option(anon_prefix(Prefix), Options, DefAnonPrefix),
|
Chris@0
|
86 option(base_uri(BaseURI), Options, FileURI),
|
Chris@0
|
87 open(File, read, In, [encoding(utf8)]),
|
Chris@0
|
88 call_cleanup(load_turtle(In, BaseURI, Prefix, Triples),
|
Chris@0
|
89 close(In)).
|
Chris@0
|
90
|
Chris@0
|
91 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
Chris@0
|
92 The parser is a two-stage processor. The first reads the raw file input
|
Chris@0
|
93 and generates a list of tokens, stripping comments and white space. It
|
Chris@0
|
94 is defined to read a single statement upto its terminating '.'. The
|
Chris@0
|
95 second stage is a traditional DCG parser generating the triples for the
|
Chris@0
|
96 statement.
|
Chris@0
|
97
|
Chris@0
|
98 State:
|
Chris@0
|
99 arg(1) BaseURI
|
Chris@0
|
100 arg(2) Prefix --> URI map
|
Chris@0
|
101 arg(3) NodeID --> URI map
|
Chris@0
|
102 arg(4) AnonPrefix
|
Chris@0
|
103 arg(5) AnonCount
|
Chris@0
|
104 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
Chris@0
|
105
|
Chris@0
|
106
|
Chris@0
|
107 load_turtle(In, BaseURI, Prefix, Triples) :-
|
Chris@0
|
108 empty_assoc(Map),
|
Chris@0
|
109 empty_assoc(NodeMap),
|
Chris@0
|
110 State = state(BaseURI, Map, NodeMap, Prefix, 1),
|
Chris@0
|
111 phrase(turtle_file(State, In), Triples).
|
Chris@0
|
112
|
Chris@0
|
113
|
Chris@0
|
114 turtle_file(State, In) -->
|
Chris@0
|
115 { ( turtle_tokens(In, Tokens)
|
Chris@0
|
116 -> debug(turtle, 'Tokens: ~w~n', [Tokens])
|
Chris@0
|
117 ; syntax_error(In, illegal_token)
|
Chris@0
|
118 )
|
Chris@0
|
119 },
|
Chris@0
|
120 ( { Tokens == end_of_file }
|
Chris@0
|
121 -> []
|
Chris@0
|
122 ; { phrase(triples(State, Triples), Tokens) }
|
Chris@0
|
123 -> Triples,
|
Chris@0
|
124 turtle_file(State, In)
|
Chris@0
|
125 ; { syntax_error(In, cannot_parse)
|
Chris@0
|
126 }
|
Chris@0
|
127 ).
|
Chris@0
|
128
|
Chris@0
|
129 triples(State, []) -->
|
Chris@0
|
130 [ '@', name(prefix), name(Prefix), : ], !,
|
Chris@0
|
131 uri(State, URI),
|
Chris@0
|
132 { arg(2, State, Map0),
|
Chris@0
|
133 put_assoc(Prefix, Map0, URI, Map),
|
Chris@0
|
134 setarg(2, State, Map)
|
Chris@0
|
135 }.
|
Chris@0
|
136 triples(State, []) -->
|
Chris@0
|
137 [ '@', name(prefix), ':' ], !,
|
Chris@0
|
138 uri(State, URI),
|
Chris@0
|
139 { setarg(1, State, URI)
|
Chris@0
|
140 }.
|
Chris@0
|
141 triples(State, Triples) -->
|
Chris@0
|
142 subject(State, Subject, Triples, T),
|
Chris@0
|
143 ( predicate_object_list(State, Subject, T, [])
|
Chris@0
|
144 -> []
|
Chris@0
|
145 ; { Triples \== T } % [ p o ; ... ] .
|
Chris@0
|
146 -> { T = [] }
|
Chris@0
|
147 ).
|
Chris@0
|
148
|
Chris@0
|
149 subject(State, Subject, T, T) -->
|
Chris@0
|
150 resource(State, Subject), !.
|
Chris@0
|
151 subject(State, Subject, T0, T) -->
|
Chris@0
|
152 blank(State, Subject, T0, T).
|
Chris@0
|
153
|
Chris@0
|
154 predicate_object_list(State, Subject, Triples, Tail) -->
|
Chris@0
|
155 verb(State, Predicate),
|
Chris@0
|
156 object_list(State, Subject, Predicate, Triples, Tail0),
|
Chris@0
|
157 ( [';']
|
Chris@0
|
158 -> opt_predicate_object_list(State, Subject, Tail0, Tail)
|
Chris@0
|
159 ; {Tail0 = Tail}
|
Chris@0
|
160 ).
|
Chris@0
|
161
|
Chris@0
|
162 opt_predicate_object_list(State, Subject, Triples, Tail) -->
|
Chris@0
|
163 predicate_object_list(State, Subject, Triples, Tail), !.
|
Chris@0
|
164 opt_predicate_object_list(_, _, Tail, Tail) -->
|
Chris@0
|
165 [].
|
Chris@0
|
166
|
Chris@0
|
167 object_list(State, Subject, Predicate,
|
Chris@0
|
168 [rdf(Subject, Predicate, Object)|T0], T) -->
|
Chris@0
|
169 object(State, Object, T0, T1),
|
Chris@0
|
170 ( [',']
|
Chris@0
|
171 -> object_list(State, Subject, Predicate, T1, T)
|
Chris@0
|
172 ; {T1 = T}
|
Chris@0
|
173 ).
|
Chris@0
|
174
|
Chris@0
|
175 verb(_, P) -->
|
Chris@0
|
176 [name(a)], !,
|
Chris@0
|
177 { rdf_equal(rdf:type, P)
|
Chris@0
|
178 }.
|
Chris@0
|
179 verb(State, P) -->
|
Chris@0
|
180 resource(State, P).
|
Chris@0
|
181
|
Chris@0
|
182 object(State, Object, T, T) -->
|
Chris@0
|
183 [ literal(Value) ], !,
|
Chris@0
|
184 { mk_object(Value, State, Object)
|
Chris@0
|
185 }.
|
Chris@0
|
186 object(_, literal(type(Type, N)), T, T) -->
|
Chris@0
|
187 [ numeric(Tp, Codes) ], !,
|
Chris@0
|
188 { numeric_url(Tp, Type),
|
Chris@0
|
189 atom_codes(N, Codes)
|
Chris@0
|
190 }.
|
Chris@0
|
191 object(State, Object, T, T) -->
|
Chris@0
|
192 resource(State, Object), !.
|
Chris@0
|
193 object(State, Object, T0, T) -->
|
Chris@0
|
194 blank(State, Object, T0, T), !.
|
Chris@0
|
195 object(_, _, _, _) -->
|
Chris@0
|
196 rest(Tokens),
|
Chris@0
|
197 { format(user_error, 'Expected object, found: ~p~n', [Tokens]),
|
Chris@0
|
198 fail
|
Chris@0
|
199 }.
|
Chris@0
|
200
|
Chris@0
|
201 rest(Rest, Rest, []).
|
Chris@0
|
202
|
Chris@0
|
203 term_expansion(numeric_url(I, Local),
|
Chris@0
|
204 numeric_url(I, URI)) :-
|
Chris@0
|
205 rdf_global_id(Local, URI).
|
Chris@0
|
206
|
Chris@0
|
207 numeric_url(integer, xsd:integer).
|
Chris@0
|
208 numeric_url(decimal, xsd:decimal).
|
Chris@0
|
209 numeric_url(double, xsd:double).
|
Chris@0
|
210
|
Chris@0
|
211 resource(State, URI) -->
|
Chris@0
|
212 uri(State, URI), !.
|
Chris@0
|
213 resource(State, URI) -->
|
Chris@0
|
214 [ :(Name) ], !,
|
Chris@0
|
215 { arg(1, State, Base),
|
Chris@0
|
216 atom_concat(Base, Name, URI)
|
Chris@0
|
217 }.
|
Chris@0
|
218 resource(State, URI) -->
|
Chris@0
|
219 [ name(Prefix), : ], !,
|
Chris@0
|
220 { arg(2, State, Map),
|
Chris@0
|
221 get_assoc(Prefix, Map, URI)
|
Chris@0
|
222 }.
|
Chris@0
|
223 resource(State, URI) -->
|
Chris@0
|
224 [ Prefix:Name ], !,
|
Chris@0
|
225 { arg(2, State, Map),
|
Chris@0
|
226 ( get_assoc(Prefix, Map, Base)
|
Chris@0
|
227 -> atom_concat(Base, Name, URI)
|
Chris@0
|
228 ; throw(error(existence_error(prefix, Prefix), _))
|
Chris@0
|
229 )
|
Chris@0
|
230 }.
|
Chris@0
|
231 resource(State, BaseURI) -->
|
Chris@0
|
232 [ : ], !,
|
Chris@0
|
233 { arg(1, State, BaseURI)
|
Chris@0
|
234 }.
|
Chris@0
|
235
|
Chris@0
|
236
|
Chris@0
|
237 uri(State, URI) -->
|
Chris@0
|
238 [ relative_uri(Rel)
|
Chris@0
|
239 ],
|
Chris@0
|
240 { arg(1, State, Base),
|
Chris@0
|
241 ( Rel == '' % must be in global_url?
|
Chris@0
|
242 -> URI = Base
|
Chris@0
|
243 ; global_url(Rel, Base, URI)
|
Chris@0
|
244 )
|
Chris@0
|
245 }.
|
Chris@0
|
246
|
Chris@0
|
247 blank(State, Resource, T, T) -->
|
Chris@0
|
248 [ nodeId(NodeId) ], !,
|
Chris@0
|
249 { arg(3, State, IdMap),
|
Chris@0
|
250 ( get_assoc(NodeId, IdMap, Resource)
|
Chris@0
|
251 -> true
|
Chris@0
|
252 ; anonid(State, NodeId, Resource),
|
Chris@0
|
253 put_assoc(NodeId, IdMap, Resource, NewIdMap),
|
Chris@0
|
254 setarg(3, State, NewIdMap)
|
Chris@0
|
255 )
|
Chris@0
|
256 }.
|
Chris@0
|
257 blank(State, Resource, T, T) -->
|
Chris@0
|
258 [ '[', ']' ], !,
|
Chris@0
|
259 { anonid(State, Resource)
|
Chris@0
|
260 }.
|
Chris@0
|
261 blank(State, Resource, T0, T) -->
|
Chris@0
|
262 [ '[' ], !,
|
Chris@0
|
263 { anonid(State, Resource)
|
Chris@0
|
264 },
|
Chris@0
|
265 predicate_object_list(State, Resource, T0, T),
|
Chris@0
|
266 [ ']' ].
|
Chris@0
|
267 blank(State, Resource, T0, T) -->
|
Chris@0
|
268 [ '(' ],
|
Chris@0
|
269 item_list(State, Resource, T0, T).
|
Chris@0
|
270
|
Chris@0
|
271 item_list(_State, Resource, T, T) -->
|
Chris@0
|
272 [ ')' ], !,
|
Chris@0
|
273 { rdf_equal(rdf:nil, Resource)
|
Chris@0
|
274 }.
|
Chris@0
|
275 item_list(State, Resource, T0, T) -->
|
Chris@0
|
276 { anonid(State, Resource) },
|
Chris@0
|
277 object(State, Object, T0, T1),
|
Chris@0
|
278 { rdf_equal(rdf:first, First),
|
Chris@0
|
279 rdf_equal(rdf:rest, Rest),
|
Chris@0
|
280 T1 = [ rdf(Resource, First, Object),
|
Chris@0
|
281 rdf(Resource, Rest, Tail)
|
Chris@0
|
282 | T2
|
Chris@0
|
283 ]
|
Chris@0
|
284 },
|
Chris@0
|
285 item_list(State, Tail, T2, T).
|
Chris@0
|
286
|
Chris@0
|
287
|
Chris@0
|
288 anonid(State, Node) :-
|
Chris@0
|
289 arg(4, State, AnonPrefix),
|
Chris@0
|
290 arg(5, State, Count),
|
Chris@0
|
291 ( atom(AnonPrefix)
|
Chris@0
|
292 -> atom_concat(AnonPrefix, Count, Node)
|
Chris@0
|
293 ; Node = node(Count)
|
Chris@0
|
294 ),
|
Chris@0
|
295 C2 is Count + 1,
|
Chris@0
|
296 setarg(5, State, C2).
|
Chris@0
|
297
|
Chris@0
|
298 anonid(State, _NodeId, Node) :-
|
Chris@0
|
299 arg(4, State, AnonPrefix),
|
Chris@0
|
300 atom(AnonPrefix), !,
|
Chris@0
|
301 anonid(State, Node).
|
Chris@0
|
302 anonid(_State, NodeId, node(NodeId)).
|
Chris@0
|
303
|
Chris@0
|
304 mk_object(type(Prefix:Name, Value), State, literal(type(Type, Value))) :- !,
|
Chris@0
|
305 arg(2, State, Map),
|
Chris@0
|
306 get_assoc(Prefix, Map, Base),
|
Chris@0
|
307 atom_concat(Base, Name, Type).
|
Chris@0
|
308 mk_object(type(relative_uri(Rel), Value), State, literal(type(Type, Value))) :- !,
|
Chris@0
|
309 arg(1, State, Base),
|
Chris@0
|
310 ( Rel == '' % must be in global_url?
|
Chris@0
|
311 -> Type = Base
|
Chris@0
|
312 ; global_url(Rel, Base, Type)
|
Chris@0
|
313 ).
|
Chris@0
|
314 mk_object(type(:(Name), Value), State, literal(type(Type, Value))) :- !,
|
Chris@0
|
315 arg(1, State, Base),
|
Chris@0
|
316 atom_concat(Base, Name, Type).
|
Chris@0
|
317 mk_object(Value, _State, literal(Value)).
|
Chris@0
|
318
|
Chris@0
|
319
|
Chris@0
|
320 /*******************************
|
Chris@0
|
321 * TOKENISER *
|
Chris@0
|
322 *******************************/
|
Chris@0
|
323
|
Chris@0
|
324 %% turtle_tokens(+In, -List)
|
Chris@0
|
325 %
|
Chris@0
|
326 % Read a statement from a turtle file, returning the contents as a
|
Chris@0
|
327 % list of tokens.
|
Chris@0
|
328
|
Chris@0
|
329 turtle_tokens(In, List) :-
|
Chris@0
|
330 get_code(In, C0),
|
Chris@0
|
331 turtle_token(C0, In, C1, Tok1),
|
Chris@0
|
332 ( Tok1 == end_of_file
|
Chris@0
|
333 -> List = end_of_file
|
Chris@0
|
334 ; List = [Tok1|Tokens],
|
Chris@0
|
335 turtle_tokens(C1, In, Tokens)
|
Chris@0
|
336 ).
|
Chris@0
|
337
|
Chris@0
|
338 turtle_tokens(C0, In, List) :-
|
Chris@0
|
339 ( turtle_token(C0, In, C1, H)
|
Chris@0
|
340 -> debug(turtle(token), 'Token: ~q', [H])
|
Chris@0
|
341 ; syntax_error(In, illegal_token)
|
Chris@0
|
342 ),
|
Chris@0
|
343 ( H == '.'
|
Chris@0
|
344 -> List = []
|
Chris@0
|
345 ; H == end_of_file
|
Chris@0
|
346 -> syntax_error(In, unexpected_end_of_input)
|
Chris@0
|
347 ; List = [H|T],
|
Chris@0
|
348 turtle_tokens(C1, In, T)
|
Chris@0
|
349 ).
|
Chris@0
|
350
|
Chris@0
|
351 turtle_token(-1, _, -1, end_of_file) :- !.
|
Chris@0
|
352 turtle_token(0'., In, C, '.') :- !,
|
Chris@0
|
353 get_code(In, C).
|
Chris@0
|
354 turtle_token(0'#, In, C, Token) :- !,
|
Chris@0
|
355 get_code(In, C1),
|
Chris@0
|
356 skip_line(C1, In, C2),
|
Chris@0
|
357 turtle_token(C2, In, C, Token).
|
Chris@0
|
358 turtle_token(WS, In, C, Token) :-
|
Chris@0
|
359 turtle_ws(WS), !,
|
Chris@0
|
360 get_code(In, C1),
|
Chris@0
|
361 turtle_token(C1, In, C, Token).
|
Chris@0
|
362 turtle_token(C0, In, C, Number) :-
|
Chris@0
|
363 between(0'0, 0'9, C0), !,
|
Chris@0
|
364 turtle_number(C0, In, C, Number).
|
Chris@0
|
365 turtle_token(0'-, In, C, Number) :- !,
|
Chris@0
|
366 turtle_number(0'-, In, C, Number).
|
Chris@0
|
367 turtle_token(0'+, In, C, Number) :- !,
|
Chris@0
|
368 turtle_number(0'+, In, C, Number).
|
Chris@0
|
369 turtle_token(0'", In, C, Literal) :- !,
|
Chris@0
|
370 get_code(In, C1),
|
Chris@0
|
371 turtle_string(C1, In, C2, Codes),
|
Chris@0
|
372 atom_codes(Atom, Codes),
|
Chris@0
|
373 ( C2 == 0'@
|
Chris@0
|
374 -> get_code(In, C3),
|
Chris@0
|
375 language(C3, In, C, LangCodes),
|
Chris@0
|
376 atom_codes(LangId, LangCodes),
|
Chris@0
|
377 Literal = literal(lang(LangId, Atom))
|
Chris@0
|
378 ; C2 == 0'^,
|
Chris@0
|
379 peek_code(In, 0'^)
|
Chris@0
|
380 -> get_code(In, 0'^),
|
Chris@0
|
381 get_code(In, C3),
|
Chris@0
|
382 resource_token(C3, In, C, Type),
|
Chris@0
|
383 Literal = literal(type(Type, Atom))
|
Chris@0
|
384 ; C = C2,
|
Chris@0
|
385 Literal = literal(Atom)
|
Chris@0
|
386 ).
|
Chris@0
|
387 turtle_token(0'_, In, C, nodeId(NodeID)) :-
|
Chris@0
|
388 peek_code(In, 0':), !,
|
Chris@0
|
389 get_code(In, _),
|
Chris@0
|
390 get_code(In, C1),
|
Chris@0
|
391 name(C1, In, C, NodeID).
|
Chris@0
|
392 turtle_token(0'<, In, C, URI) :- !,
|
Chris@0
|
393 resource_token(0'<, In, C, URI).
|
Chris@0
|
394 turtle_token(0':, In, C, URI) :- !,
|
Chris@0
|
395 resource_token(0':, In, C, URI).
|
Chris@0
|
396 turtle_token(C0, In, C, Token) :-
|
Chris@0
|
397 name(C0, In, C1, Name), !,
|
Chris@0
|
398 ( C1 == 0':,
|
Chris@0
|
399 \+ sub_atom(Name, 0, _, _, '_'),
|
Chris@0
|
400 peek_code(In, C2),
|
Chris@0
|
401 name_start_char(C2)
|
Chris@0
|
402 -> get_code(In, C2),
|
Chris@0
|
403 name(C2, In, C, Name2),
|
Chris@0
|
404 Token = (Name:Name2)
|
Chris@0
|
405 ; Token = name(Name),
|
Chris@0
|
406 C = C1
|
Chris@0
|
407 ).
|
Chris@0
|
408 turtle_token(Punct, In, C, P) :-
|
Chris@0
|
409 punctuation(Punct, P), !,
|
Chris@0
|
410 get_code(In, C).
|
Chris@0
|
411
|
Chris@0
|
412 %% turtle_number(+Char0, +In, -CharNext, -Value)
|
Chris@0
|
413 %
|
Chris@0
|
414 % Value is Type:CodeList
|
Chris@0
|
415
|
Chris@0
|
416 turtle_number(0'-, In, CN, numeric(T, [0'-|Codes])) :- !,
|
Chris@0
|
417 get_code(In, C0),
|
Chris@0
|
418 turtle_number_nn(C0, In, CN, numeric(T, Codes)).
|
Chris@0
|
419 turtle_number(0'+, In, CN, numeric(T, [0'+|Codes])) :- !,
|
Chris@0
|
420 get_code(In, C0),
|
Chris@0
|
421 turtle_number_nn(C0, In, CN, numeric(T, Codes)).
|
Chris@0
|
422 turtle_number(C0, In, CN, Value) :-
|
Chris@0
|
423 turtle_number_nn(C0, In, CN, Value).
|
Chris@0
|
424
|
Chris@0
|
425 turtle_number_nn(C, In, CN, numeric(Type, Codes)) :-
|
Chris@0
|
426 turtle_integer_codes(C, In, CN0, Codes, T0), % [0-9]+
|
Chris@0
|
427 ( CN0 == 0'.
|
Chris@0
|
428 -> T0 = [CN0|T1],
|
Chris@0
|
429 get_code(In, C1),
|
Chris@0
|
430 turtle_integer_codes(C1, In, CN1, T1, T2), % [0-9]+.[0-9]+
|
Chris@0
|
431 ( exponent(CN1, In, CN, T2)
|
Chris@0
|
432 -> Type = double
|
Chris@0
|
433 ; CN = CN1,
|
Chris@0
|
434 T2 = [],
|
Chris@0
|
435 Type = decimal
|
Chris@0
|
436 )
|
Chris@0
|
437 ; exponent(CN0, In, CN, T0)
|
Chris@0
|
438 -> Type = double
|
Chris@0
|
439 ; T0 = [],
|
Chris@0
|
440 CN = CN0,
|
Chris@0
|
441 Type = integer
|
Chris@0
|
442 ).
|
Chris@0
|
443
|
Chris@0
|
444 turtle_integer_codes(C0, In, CN, [C0|T0], T) :-
|
Chris@0
|
445 between(0'0, 0'9, C0), !,
|
Chris@0
|
446 get_code(In, C1),
|
Chris@0
|
447 turtle_integer_codes(C1, In, CN, T0, T).
|
Chris@0
|
448 turtle_integer_codes(CN, _, CN, T, T).
|
Chris@0
|
449
|
Chris@0
|
450 exponent(C0, In, CN, [C0|T0]) :-
|
Chris@0
|
451 e(C0), !,
|
Chris@0
|
452 get_code(In, C1),
|
Chris@0
|
453 optional_sign(C1, In, CN0, T0, T1),
|
Chris@0
|
454 turtle_integer_codes(CN0, In, CN, T1, []).
|
Chris@0
|
455
|
Chris@0
|
456 optional_sign(C0, In, CN, [C0|T], T) :-
|
Chris@0
|
457 sign(C0), !,
|
Chris@0
|
458 get_code(In, CN).
|
Chris@0
|
459 optional_sign(CN, _, CN, T, T).
|
Chris@0
|
460
|
Chris@0
|
461 e(0'e).
|
Chris@0
|
462 e(0'E).
|
Chris@0
|
463
|
Chris@0
|
464 sign(0'-).
|
Chris@0
|
465 sign(0'+).
|
Chris@0
|
466
|
Chris@0
|
467 % string
|
Chris@0
|
468 turtle_string(-1, In, _, []) :- !,
|
Chris@0
|
469 syntax_error(In, unexpected_end_of_input).
|
Chris@0
|
470 turtle_string(0'", In, C, []) :- !,
|
Chris@0
|
471 get_code(In, C).
|
Chris@0
|
472 turtle_string(0'\\, In, C, [H|T]) :- !,
|
Chris@0
|
473 get_code(In, C1),
|
Chris@0
|
474 string_escape(C1, In, C2, H),
|
Chris@0
|
475 turtle_string(C2, In, C, T).
|
Chris@0
|
476 turtle_string(C0, In, C, [C0|T]) :-
|
Chris@0
|
477 get_code(In, C1),
|
Chris@0
|
478 turtle_string(C1, In, C, T).
|
Chris@0
|
479
|
Chris@0
|
480
|
Chris@0
|
481 string_escape(0'n, In, C, 0'\n) :- !,
|
Chris@0
|
482 get_code(In, C).
|
Chris@0
|
483 string_escape(0'", In, C, 0'") :- !,
|
Chris@0
|
484 get_code(In, C).
|
Chris@0
|
485 string_escape(0'\\, In, C, 0'\\) :- !,
|
Chris@0
|
486 get_code(In, C).
|
Chris@0
|
487 string_escape(0't, In, C, 0'\t) :- !,
|
Chris@0
|
488 get_code(In, C).
|
Chris@0
|
489 string_escape(0'r, In, C, 0'\r) :- !,
|
Chris@0
|
490 get_code(In, C).
|
Chris@0
|
491 string_escape(0'u, In, C, Code) :- !,
|
Chris@0
|
492 get_hhhh(In, Code),
|
Chris@0
|
493 get_code(In, C).
|
Chris@0
|
494 string_escape(0'U, In, C, Code) :- !,
|
Chris@0
|
495 get_hhhh(In, Code0),
|
Chris@0
|
496 get_hhhh(In, Code1),
|
Chris@0
|
497 Code is Code0 << 16 + Code1,
|
Chris@0
|
498 get_code(In, C).
|
Chris@0
|
499
|
Chris@0
|
500 get_hhhh(In, Code) :-
|
Chris@0
|
501 get_code(In, C1), code_type(C1, xdigit(D1)),
|
Chris@0
|
502 get_code(In, C2), code_type(C2, xdigit(D2)),
|
Chris@0
|
503 get_code(In, C3), code_type(C3, xdigit(D3)),
|
Chris@0
|
504 get_code(In, C4), code_type(C4, xdigit(D4)),
|
Chris@0
|
505 Code is D1<<12+D2<<8+D3<<4+D4.
|
Chris@0
|
506
|
Chris@0
|
507 % language: [a-z]+ ('-' [a-z0-9]+ )*
|
Chris@0
|
508 language(C0, In, C, [C0|Codes]) :-
|
Chris@0
|
509 code_type(C0, lower),
|
Chris@0
|
510 get_code(In, C1),
|
Chris@0
|
511 lwr_word(C1, In, C2, Codes, Tail),
|
Chris@0
|
512 sub_langs(C2, In, C, Tail, []).
|
Chris@0
|
513
|
Chris@0
|
514 lwr_word(C0, In, C, [C0|T0], T) :-
|
Chris@0
|
515 code_type(C0, lower), !,
|
Chris@0
|
516 get_code(In, C1),
|
Chris@0
|
517 lwr_word(C1, In, C, T0, T).
|
Chris@0
|
518 lwr_word(C, _, C, T, T).
|
Chris@0
|
519
|
Chris@0
|
520 sub_langs(0'-, In, C, [0'-, C1|Codes], T) :- !,
|
Chris@0
|
521 get_code(In, C1),
|
Chris@0
|
522 lwrdig(C1), !,
|
Chris@0
|
523 get_code(In, C2),
|
Chris@0
|
524 lwrdigs(C2, In, C3, Codes, Tail),
|
Chris@0
|
525 sub_langs(C3, In, C, Tail, T).
|
Chris@0
|
526 sub_langs(C, _, C, T, T).
|
Chris@0
|
527
|
Chris@0
|
528 lwrdig(C) :-
|
Chris@0
|
529 code_type(C, lower), !.
|
Chris@0
|
530 lwrdig(C) :-
|
Chris@0
|
531 code_type(C, digit).
|
Chris@0
|
532
|
Chris@0
|
533 lwrdigs(C0, In, C, [C0|T0], T) :-
|
Chris@0
|
534 lwrdig(C0), !,
|
Chris@0
|
535 get_code(In, C1),
|
Chris@0
|
536 lwr_word(C1, In, C, T0, T).
|
Chris@0
|
537 lwrdigs(C, _, C, T, T).
|
Chris@0
|
538
|
Chris@0
|
539 % resource_token
|
Chris@0
|
540 resource_token(0'<, In, C, relative_uri(URI)) :- !,
|
Chris@0
|
541 get_code(In, C1),
|
Chris@0
|
542 uri_chars(C1, In, C, Codes),
|
Chris@0
|
543 atom_codes(URI, Codes).
|
Chris@0
|
544 resource_token(0':, In, C, Token) :- !,
|
Chris@0
|
545 get_code(In, C0),
|
Chris@0
|
546 ( name(C0, In, C, Name)
|
Chris@0
|
547 -> Token = :(Name)
|
Chris@0
|
548 ; Token = :,
|
Chris@0
|
549 C = C0
|
Chris@0
|
550 ).
|
Chris@0
|
551 resource_token(C0, In, C, Prefix:Name) :-
|
Chris@0
|
552 name(C0, In, C1, Prefix),
|
Chris@0
|
553 \+ sub_atom(Prefix, 0, _, _, '_'), !,
|
Chris@0
|
554 C1 == 0':,
|
Chris@0
|
555 get_code(In, C2),
|
Chris@0
|
556 name(C2, In, C, Name).
|
Chris@0
|
557
|
Chris@0
|
558
|
Chris@0
|
559 uri_chars(0'>, In, C, []) :- !,
|
Chris@0
|
560 get_code(In, C).
|
Chris@0
|
561 uri_chars(0'\\, In, C, [H|T]) :- !,
|
Chris@0
|
562 get_code(In, C1),
|
Chris@0
|
563 string_escape(C1, In, C2, H),
|
Chris@0
|
564 uri_chars(C2, In, C, T).
|
Chris@0
|
565 uri_chars(C0, In, C, [C0|T]) :-
|
Chris@0
|
566 get_code(In, C1),
|
Chris@0
|
567 uri_chars(C1, In, C, T).
|
Chris@0
|
568
|
Chris@0
|
569 % name
|
Chris@0
|
570 name(C0, In, C, Atom) :-
|
Chris@0
|
571 name_start_char(C0),
|
Chris@0
|
572 get_code(In, C1),
|
Chris@0
|
573 name_chars(C1, In, C, T),
|
Chris@0
|
574 atom_codes(Atom, [C0|T]).
|
Chris@0
|
575
|
Chris@0
|
576 name_chars(C0, In, C, [C0|T]) :-
|
Chris@0
|
577 name_char(C0), !,
|
Chris@0
|
578 get_code(In, C1),
|
Chris@0
|
579 name_chars(C1, In, C, T).
|
Chris@0
|
580 name_chars(C, _, C, []).
|
Chris@0
|
581
|
Chris@0
|
582 name_start_char(C) :- code_type(C, csymf).
|
Chris@0
|
583 name_start_char(C) :- between(0xC0, 0xD6, C).
|
Chris@0
|
584 name_start_char(C) :- between(0xD8, 0xF6, C).
|
Chris@0
|
585 name_start_char(C) :- between(0xF8, 0x2FF, C).
|
Chris@0
|
586 name_start_char(C) :- between(0x370, 0x37D, C).
|
Chris@0
|
587 name_start_char(C) :- between(0x37F, 0x1FFF, C).
|
Chris@0
|
588 name_start_char(C) :- between(0x200C, 0x200D, C).
|
Chris@0
|
589 name_start_char(C) :- between(0x2070, 0x218F, C).
|
Chris@0
|
590 name_start_char(C) :- between(0x2C00, 0x2FEF, C).
|
Chris@0
|
591 name_start_char(C) :- between(0x3001, 0xD7FF, C).
|
Chris@0
|
592 name_start_char(C) :- between(0xF900, 0xFDCF, C).
|
Chris@0
|
593 name_start_char(C) :- between(0xFDF0, 0xFFFD, C).
|
Chris@0
|
594 name_start_char(C) :- between(0x10000, 0xEFFFF, C).
|
Chris@0
|
595
|
Chris@0
|
596 name_char(C) :- name_start_char(C).
|
Chris@0
|
597 name_char(0'-).
|
Chris@0
|
598 name_char(D) :- code_type(D, digit).
|
Chris@0
|
599 name_char(0xB7).
|
Chris@0
|
600 name_char(C) :- between(0x0300, 0x036F, C).
|
Chris@0
|
601 name_char(C) :- between(0x203F, 0x2040, C).
|
Chris@0
|
602
|
Chris@0
|
603 punctuation(0'(, '(').
|
Chris@0
|
604 punctuation(0'), ')').
|
Chris@0
|
605 punctuation(0'[, '[').
|
Chris@0
|
606 punctuation(0'], ']').
|
Chris@0
|
607 punctuation(0',, ',').
|
Chris@0
|
608 punctuation(0'@, '@').
|
Chris@0
|
609 punctuation(0':, ':').
|
Chris@0
|
610 punctuation(0';, ';').
|
Chris@0
|
611
|
Chris@0
|
612 % comment
|
Chris@0
|
613 skip_line(0xA, In, C) :- !,
|
Chris@0
|
614 get_code(In, C).
|
Chris@0
|
615 skip_line(0xD, In, C) :- !,
|
Chris@0
|
616 get_code(In, C).
|
Chris@0
|
617 skip_line(_, In, C) :- !,
|
Chris@0
|
618 get_code(In, C1),
|
Chris@0
|
619 skip_line(C1, In, C).
|
Chris@0
|
620
|
Chris@0
|
621 % ws
|
Chris@0
|
622 turtle_ws(0x9).
|
Chris@0
|
623 turtle_ws(0xA).
|
Chris@0
|
624 turtle_ws(0xD).
|
Chris@0
|
625 turtle_ws(0x20).
|
Chris@0
|
626
|
Chris@0
|
627 syntax_error(Stream, Which) :-
|
Chris@0
|
628 stream_property(Stream, file_name(File)),
|
Chris@0
|
629 line_count(Stream, LineNo),
|
Chris@0
|
630 line_position(Stream, LinePos),
|
Chris@0
|
631 character_count(Stream, CharIndex),
|
Chris@0
|
632 throw(error(syntax_error(Which),
|
Chris@0
|
633 file(File, LineNo, LinePos, CharIndex))).
|
Chris@0
|
634
|
Chris@0
|
635
|
Chris@0
|
636 /*******************************
|
Chris@0
|
637 * HOOK *
|
Chris@0
|
638 *******************************/
|
Chris@0
|
639
|
Chris@0
|
640 :- multifile
|
Chris@0
|
641 rdf_io:load_triples/3,
|
Chris@0
|
642 rdf_io:get_triples/4.
|
Chris@0
|
643
|
Chris@0
|
644 rdf_io:load_triples(turtle, Input, Options) :- !,
|
Chris@0
|
645 debug(turtle, 'Loading turtle data from ~w', [Input]),
|
Chris@0
|
646 rdf_load_turtle_file(Input, Triples, Options),
|
Chris@0
|
647 option(base_uri(DB), Options, []),
|
Chris@0
|
648 length(Triples, N),
|
Chris@0
|
649 debug(turtle, 'Loaded ~D triples into ~w', [N, DB]),
|
Chris@0
|
650 assert_triples(Triples, DB).
|
Chris@0
|
651
|
Chris@0
|
652 assert_triples([], _).
|
Chris@0
|
653 assert_triples([rdf(S,P,O)|T], DB) :-
|
Chris@0
|
654 rdf_assert(S,P,O,DB),
|
Chris@0
|
655 assert_triples(T, DB).
|
Chris@0
|
656
|
Chris@0
|
657
|
Chris@0
|
658 rdf_io:get_triples(turtle, Input, Triples, Options) :- !,
|
Chris@0
|
659 debug(turtle, 'Loading turtle data from ~w', [Input]),
|
Chris@0
|
660 rdf_load_turtle_file(Input, Triples, Options).
|