annotate jamendo/sparql-archived/SeRQL/rdf_turtle.pl @ 27:d95e683fbd35 tip

Enable CORS on urispace redirects as well
author Chris Cannam
date Tue, 20 Feb 2018 14:52:02 +0000
parents df9685986338
children
rev   line source
Chris@0 1 /* $Id$
Chris@0 2
Chris@0 3 Part of SWI-Prolog
Chris@0 4
Chris@0 5 Author: Jan Wielemaker
Chris@0 6 E-mail: jan@swi.psy.uva.nl
Chris@0 7 WWW: http://www.swi-prolog.org
Chris@0 8 Copyright (C): 1985-2004, University of Amsterdam
Chris@0 9
Chris@0 10 This program is free software; you can redistribute it and/or
Chris@0 11 modify it under the terms of the GNU General Public License
Chris@0 12 as published by the Free Software Foundation; either version 2
Chris@0 13 of the License, or (at your option) any later version.
Chris@0 14
Chris@0 15 This program is distributed in the hope that it will be useful,
Chris@0 16 but WITHOUT ANY WARRANTY; without even the implied warranty of
Chris@0 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
Chris@0 18 GNU General Public License for more details.
Chris@0 19
Chris@0 20 You should have received a copy of the GNU Lesser General Public
Chris@0 21 License along with this library; if not, write to the Free Software
Chris@0 22 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Chris@0 23
Chris@0 24 As a special exception, if you link this library with other files,
Chris@0 25 compiled with a Free Software compiler, to produce an executable, this
Chris@0 26 library does not by itself cause the resulting executable to be covered
Chris@0 27 by the GNU General Public License. This exception does not however
Chris@0 28 invalidate any other reasons why the executable file might be covered by
Chris@0 29 the GNU General Public License.
Chris@0 30 */
Chris@0 31
Chris@0 32 :- module(serql_rdf_turtle,
Chris@0 33 [ rdf_load_turtle_file/3 % +File, -Triples, +Options
Chris@0 34 ]).
Chris@0 35 :- use_module(library(assoc)).
Chris@0 36 :- use_module(library(option)).
Chris@0 37 :- use_module(library('semweb/rdf_db')).
Chris@0 38 :- use_module(library(debug)).
Chris@0 39 :- use_module(library(url)).
Chris@0 40
Chris@0 41 /* NOTE: must be changed to load library('semweb/rdf_turtle'))!!!
Chris@0 42 */
Chris@0 43
Chris@0 44
Chris@0 45 /** <module> Turtle - Terse RDF Triple Language
Chris@0 46
Chris@0 47 This module implements the Turtle language for representing the RDF
Chris@0 48 triple model as defined by Dave Beckett from the Institute for Learning
Chris@0 49 and Research Technology University of Bristol in the document:
Chris@0 50
Chris@0 51 * http://www.ilrt.bris.ac.uk/discovery/2004/01/turtle/
Chris@0 52
Chris@0 53 The current parser handles all positive and negative examples provided
Chris@0 54 by the above document at october 17, 2004.
Chris@0 55
Chris@0 56 @tbd * Much better error handling
Chris@0 57 * Write turtle data
Chris@0 58 */
Chris@0 59
Chris@0 60 %% rdf_load_turtle_file(+Input, -Triples, +Options)
Chris@0 61 %
Chris@0 62 % Read a stream or file into a set of triples of the format
Chris@0 63 %
Chris@0 64 % rdf(Subject, Predicate, Object)
Chris@0 65 %
Chris@0 66 % The representation is consistent with the SWI-Prolog RDF/XML
Chris@0 67 % and ntriples parsers. Provided options are:
Chris@0 68 %
Chris@0 69 % * base_uri(+BaseURI)
Chris@0 70 % Initial base URI. Defaults to file://<file> for loading
Chris@0 71 % files.
Chris@0 72 %
Chris@0 73 % * anon_prefix(+Prefix)
Chris@0 74 % Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
Chris@0 75 % If Prefix is not an atom blank nodes are generated as
Chris@0 76 % node(1), node(2), ...
Chris@0 77
Chris@0 78 rdf_load_turtle_file(stream(In), Triples, Options) :- !,
Chris@0 79 option(anon_prefix(Prefix), Options, '__bnode'),
Chris@0 80 load_turtle(In, [], Prefix, Triples).
Chris@0 81 rdf_load_turtle_file(RelFile, Triples, Options) :-
Chris@0 82 absolute_file_name(RelFile, File),
Chris@0 83 atom_concat('file://', File, FileURI),
Chris@0 84 atom_concat('__', FileURI, DefAnonPrefix),
Chris@0 85 option(anon_prefix(Prefix), Options, DefAnonPrefix),
Chris@0 86 option(base_uri(BaseURI), Options, FileURI),
Chris@0 87 open(File, read, In, [encoding(utf8)]),
Chris@0 88 call_cleanup(load_turtle(In, BaseURI, Prefix, Triples),
Chris@0 89 close(In)).
Chris@0 90
Chris@0 91 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Chris@0 92 The parser is a two-stage processor. The first reads the raw file input
Chris@0 93 and generates a list of tokens, stripping comments and white space. It
Chris@0 94 is defined to read a single statement upto its terminating '.'. The
Chris@0 95 second stage is a traditional DCG parser generating the triples for the
Chris@0 96 statement.
Chris@0 97
Chris@0 98 State:
Chris@0 99 arg(1) BaseURI
Chris@0 100 arg(2) Prefix --> URI map
Chris@0 101 arg(3) NodeID --> URI map
Chris@0 102 arg(4) AnonPrefix
Chris@0 103 arg(5) AnonCount
Chris@0 104 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
Chris@0 105
Chris@0 106
Chris@0 107 load_turtle(In, BaseURI, Prefix, Triples) :-
Chris@0 108 empty_assoc(Map),
Chris@0 109 empty_assoc(NodeMap),
Chris@0 110 State = state(BaseURI, Map, NodeMap, Prefix, 1),
Chris@0 111 phrase(turtle_file(State, In), Triples).
Chris@0 112
Chris@0 113
Chris@0 114 turtle_file(State, In) -->
Chris@0 115 { ( turtle_tokens(In, Tokens)
Chris@0 116 -> debug(turtle, 'Tokens: ~w~n', [Tokens])
Chris@0 117 ; syntax_error(In, illegal_token)
Chris@0 118 )
Chris@0 119 },
Chris@0 120 ( { Tokens == end_of_file }
Chris@0 121 -> []
Chris@0 122 ; { phrase(triples(State, Triples), Tokens) }
Chris@0 123 -> Triples,
Chris@0 124 turtle_file(State, In)
Chris@0 125 ; { syntax_error(In, cannot_parse)
Chris@0 126 }
Chris@0 127 ).
Chris@0 128
Chris@0 129 triples(State, []) -->
Chris@0 130 [ '@', name(prefix), name(Prefix), : ], !,
Chris@0 131 uri(State, URI),
Chris@0 132 { arg(2, State, Map0),
Chris@0 133 put_assoc(Prefix, Map0, URI, Map),
Chris@0 134 setarg(2, State, Map)
Chris@0 135 }.
Chris@0 136 triples(State, []) -->
Chris@0 137 [ '@', name(prefix), ':' ], !,
Chris@0 138 uri(State, URI),
Chris@0 139 { setarg(1, State, URI)
Chris@0 140 }.
Chris@0 141 triples(State, Triples) -->
Chris@0 142 subject(State, Subject, Triples, T),
Chris@0 143 ( predicate_object_list(State, Subject, T, [])
Chris@0 144 -> []
Chris@0 145 ; { Triples \== T } % [ p o ; ... ] .
Chris@0 146 -> { T = [] }
Chris@0 147 ).
Chris@0 148
Chris@0 149 subject(State, Subject, T, T) -->
Chris@0 150 resource(State, Subject), !.
Chris@0 151 subject(State, Subject, T0, T) -->
Chris@0 152 blank(State, Subject, T0, T).
Chris@0 153
Chris@0 154 predicate_object_list(State, Subject, Triples, Tail) -->
Chris@0 155 verb(State, Predicate),
Chris@0 156 object_list(State, Subject, Predicate, Triples, Tail0),
Chris@0 157 ( [';']
Chris@0 158 -> opt_predicate_object_list(State, Subject, Tail0, Tail)
Chris@0 159 ; {Tail0 = Tail}
Chris@0 160 ).
Chris@0 161
Chris@0 162 opt_predicate_object_list(State, Subject, Triples, Tail) -->
Chris@0 163 predicate_object_list(State, Subject, Triples, Tail), !.
Chris@0 164 opt_predicate_object_list(_, _, Tail, Tail) -->
Chris@0 165 [].
Chris@0 166
Chris@0 167 object_list(State, Subject, Predicate,
Chris@0 168 [rdf(Subject, Predicate, Object)|T0], T) -->
Chris@0 169 object(State, Object, T0, T1),
Chris@0 170 ( [',']
Chris@0 171 -> object_list(State, Subject, Predicate, T1, T)
Chris@0 172 ; {T1 = T}
Chris@0 173 ).
Chris@0 174
Chris@0 175 verb(_, P) -->
Chris@0 176 [name(a)], !,
Chris@0 177 { rdf_equal(rdf:type, P)
Chris@0 178 }.
Chris@0 179 verb(State, P) -->
Chris@0 180 resource(State, P).
Chris@0 181
Chris@0 182 object(State, Object, T, T) -->
Chris@0 183 [ literal(Value) ], !,
Chris@0 184 { mk_object(Value, State, Object)
Chris@0 185 }.
Chris@0 186 object(_, literal(type(Type, N)), T, T) -->
Chris@0 187 [ numeric(Tp, Codes) ], !,
Chris@0 188 { numeric_url(Tp, Type),
Chris@0 189 atom_codes(N, Codes)
Chris@0 190 }.
Chris@0 191 object(State, Object, T, T) -->
Chris@0 192 resource(State, Object), !.
Chris@0 193 object(State, Object, T0, T) -->
Chris@0 194 blank(State, Object, T0, T), !.
Chris@0 195 object(_, _, _, _) -->
Chris@0 196 rest(Tokens),
Chris@0 197 { format(user_error, 'Expected object, found: ~p~n', [Tokens]),
Chris@0 198 fail
Chris@0 199 }.
Chris@0 200
Chris@0 201 rest(Rest, Rest, []).
Chris@0 202
Chris@0 203 term_expansion(numeric_url(I, Local),
Chris@0 204 numeric_url(I, URI)) :-
Chris@0 205 rdf_global_id(Local, URI).
Chris@0 206
Chris@0 207 numeric_url(integer, xsd:integer).
Chris@0 208 numeric_url(decimal, xsd:decimal).
Chris@0 209 numeric_url(double, xsd:double).
Chris@0 210
Chris@0 211 resource(State, URI) -->
Chris@0 212 uri(State, URI), !.
Chris@0 213 resource(State, URI) -->
Chris@0 214 [ :(Name) ], !,
Chris@0 215 { arg(1, State, Base),
Chris@0 216 atom_concat(Base, Name, URI)
Chris@0 217 }.
Chris@0 218 resource(State, URI) -->
Chris@0 219 [ name(Prefix), : ], !,
Chris@0 220 { arg(2, State, Map),
Chris@0 221 get_assoc(Prefix, Map, URI)
Chris@0 222 }.
Chris@0 223 resource(State, URI) -->
Chris@0 224 [ Prefix:Name ], !,
Chris@0 225 { arg(2, State, Map),
Chris@0 226 ( get_assoc(Prefix, Map, Base)
Chris@0 227 -> atom_concat(Base, Name, URI)
Chris@0 228 ; throw(error(existence_error(prefix, Prefix), _))
Chris@0 229 )
Chris@0 230 }.
Chris@0 231 resource(State, BaseURI) -->
Chris@0 232 [ : ], !,
Chris@0 233 { arg(1, State, BaseURI)
Chris@0 234 }.
Chris@0 235
Chris@0 236
Chris@0 237 uri(State, URI) -->
Chris@0 238 [ relative_uri(Rel)
Chris@0 239 ],
Chris@0 240 { arg(1, State, Base),
Chris@0 241 ( Rel == '' % must be in global_url?
Chris@0 242 -> URI = Base
Chris@0 243 ; global_url(Rel, Base, URI)
Chris@0 244 )
Chris@0 245 }.
Chris@0 246
Chris@0 247 blank(State, Resource, T, T) -->
Chris@0 248 [ nodeId(NodeId) ], !,
Chris@0 249 { arg(3, State, IdMap),
Chris@0 250 ( get_assoc(NodeId, IdMap, Resource)
Chris@0 251 -> true
Chris@0 252 ; anonid(State, NodeId, Resource),
Chris@0 253 put_assoc(NodeId, IdMap, Resource, NewIdMap),
Chris@0 254 setarg(3, State, NewIdMap)
Chris@0 255 )
Chris@0 256 }.
Chris@0 257 blank(State, Resource, T, T) -->
Chris@0 258 [ '[', ']' ], !,
Chris@0 259 { anonid(State, Resource)
Chris@0 260 }.
Chris@0 261 blank(State, Resource, T0, T) -->
Chris@0 262 [ '[' ], !,
Chris@0 263 { anonid(State, Resource)
Chris@0 264 },
Chris@0 265 predicate_object_list(State, Resource, T0, T),
Chris@0 266 [ ']' ].
Chris@0 267 blank(State, Resource, T0, T) -->
Chris@0 268 [ '(' ],
Chris@0 269 item_list(State, Resource, T0, T).
Chris@0 270
Chris@0 271 item_list(_State, Resource, T, T) -->
Chris@0 272 [ ')' ], !,
Chris@0 273 { rdf_equal(rdf:nil, Resource)
Chris@0 274 }.
Chris@0 275 item_list(State, Resource, T0, T) -->
Chris@0 276 { anonid(State, Resource) },
Chris@0 277 object(State, Object, T0, T1),
Chris@0 278 { rdf_equal(rdf:first, First),
Chris@0 279 rdf_equal(rdf:rest, Rest),
Chris@0 280 T1 = [ rdf(Resource, First, Object),
Chris@0 281 rdf(Resource, Rest, Tail)
Chris@0 282 | T2
Chris@0 283 ]
Chris@0 284 },
Chris@0 285 item_list(State, Tail, T2, T).
Chris@0 286
Chris@0 287
Chris@0 288 anonid(State, Node) :-
Chris@0 289 arg(4, State, AnonPrefix),
Chris@0 290 arg(5, State, Count),
Chris@0 291 ( atom(AnonPrefix)
Chris@0 292 -> atom_concat(AnonPrefix, Count, Node)
Chris@0 293 ; Node = node(Count)
Chris@0 294 ),
Chris@0 295 C2 is Count + 1,
Chris@0 296 setarg(5, State, C2).
Chris@0 297
Chris@0 298 anonid(State, _NodeId, Node) :-
Chris@0 299 arg(4, State, AnonPrefix),
Chris@0 300 atom(AnonPrefix), !,
Chris@0 301 anonid(State, Node).
Chris@0 302 anonid(_State, NodeId, node(NodeId)).
Chris@0 303
Chris@0 304 mk_object(type(Prefix:Name, Value), State, literal(type(Type, Value))) :- !,
Chris@0 305 arg(2, State, Map),
Chris@0 306 get_assoc(Prefix, Map, Base),
Chris@0 307 atom_concat(Base, Name, Type).
Chris@0 308 mk_object(type(relative_uri(Rel), Value), State, literal(type(Type, Value))) :- !,
Chris@0 309 arg(1, State, Base),
Chris@0 310 ( Rel == '' % must be in global_url?
Chris@0 311 -> Type = Base
Chris@0 312 ; global_url(Rel, Base, Type)
Chris@0 313 ).
Chris@0 314 mk_object(type(:(Name), Value), State, literal(type(Type, Value))) :- !,
Chris@0 315 arg(1, State, Base),
Chris@0 316 atom_concat(Base, Name, Type).
Chris@0 317 mk_object(Value, _State, literal(Value)).
Chris@0 318
Chris@0 319
Chris@0 320 /*******************************
Chris@0 321 * TOKENISER *
Chris@0 322 *******************************/
Chris@0 323
Chris@0 324 %% turtle_tokens(+In, -List)
Chris@0 325 %
Chris@0 326 % Read a statement from a turtle file, returning the contents as a
Chris@0 327 % list of tokens.
Chris@0 328
Chris@0 329 turtle_tokens(In, List) :-
Chris@0 330 get_code(In, C0),
Chris@0 331 turtle_token(C0, In, C1, Tok1),
Chris@0 332 ( Tok1 == end_of_file
Chris@0 333 -> List = end_of_file
Chris@0 334 ; List = [Tok1|Tokens],
Chris@0 335 turtle_tokens(C1, In, Tokens)
Chris@0 336 ).
Chris@0 337
Chris@0 338 turtle_tokens(C0, In, List) :-
Chris@0 339 ( turtle_token(C0, In, C1, H)
Chris@0 340 -> debug(turtle(token), 'Token: ~q', [H])
Chris@0 341 ; syntax_error(In, illegal_token)
Chris@0 342 ),
Chris@0 343 ( H == '.'
Chris@0 344 -> List = []
Chris@0 345 ; H == end_of_file
Chris@0 346 -> syntax_error(In, unexpected_end_of_input)
Chris@0 347 ; List = [H|T],
Chris@0 348 turtle_tokens(C1, In, T)
Chris@0 349 ).
Chris@0 350
Chris@0 351 turtle_token(-1, _, -1, end_of_file) :- !.
Chris@0 352 turtle_token(0'., In, C, '.') :- !,
Chris@0 353 get_code(In, C).
Chris@0 354 turtle_token(0'#, In, C, Token) :- !,
Chris@0 355 get_code(In, C1),
Chris@0 356 skip_line(C1, In, C2),
Chris@0 357 turtle_token(C2, In, C, Token).
Chris@0 358 turtle_token(WS, In, C, Token) :-
Chris@0 359 turtle_ws(WS), !,
Chris@0 360 get_code(In, C1),
Chris@0 361 turtle_token(C1, In, C, Token).
Chris@0 362 turtle_token(C0, In, C, Number) :-
Chris@0 363 between(0'0, 0'9, C0), !,
Chris@0 364 turtle_number(C0, In, C, Number).
Chris@0 365 turtle_token(0'-, In, C, Number) :- !,
Chris@0 366 turtle_number(0'-, In, C, Number).
Chris@0 367 turtle_token(0'+, In, C, Number) :- !,
Chris@0 368 turtle_number(0'+, In, C, Number).
Chris@0 369 turtle_token(0'", In, C, Literal) :- !,
Chris@0 370 get_code(In, C1),
Chris@0 371 turtle_string(C1, In, C2, Codes),
Chris@0 372 atom_codes(Atom, Codes),
Chris@0 373 ( C2 == 0'@
Chris@0 374 -> get_code(In, C3),
Chris@0 375 language(C3, In, C, LangCodes),
Chris@0 376 atom_codes(LangId, LangCodes),
Chris@0 377 Literal = literal(lang(LangId, Atom))
Chris@0 378 ; C2 == 0'^,
Chris@0 379 peek_code(In, 0'^)
Chris@0 380 -> get_code(In, 0'^),
Chris@0 381 get_code(In, C3),
Chris@0 382 resource_token(C3, In, C, Type),
Chris@0 383 Literal = literal(type(Type, Atom))
Chris@0 384 ; C = C2,
Chris@0 385 Literal = literal(Atom)
Chris@0 386 ).
Chris@0 387 turtle_token(0'_, In, C, nodeId(NodeID)) :-
Chris@0 388 peek_code(In, 0':), !,
Chris@0 389 get_code(In, _),
Chris@0 390 get_code(In, C1),
Chris@0 391 name(C1, In, C, NodeID).
Chris@0 392 turtle_token(0'<, In, C, URI) :- !,
Chris@0 393 resource_token(0'<, In, C, URI).
Chris@0 394 turtle_token(0':, In, C, URI) :- !,
Chris@0 395 resource_token(0':, In, C, URI).
Chris@0 396 turtle_token(C0, In, C, Token) :-
Chris@0 397 name(C0, In, C1, Name), !,
Chris@0 398 ( C1 == 0':,
Chris@0 399 \+ sub_atom(Name, 0, _, _, '_'),
Chris@0 400 peek_code(In, C2),
Chris@0 401 name_start_char(C2)
Chris@0 402 -> get_code(In, C2),
Chris@0 403 name(C2, In, C, Name2),
Chris@0 404 Token = (Name:Name2)
Chris@0 405 ; Token = name(Name),
Chris@0 406 C = C1
Chris@0 407 ).
Chris@0 408 turtle_token(Punct, In, C, P) :-
Chris@0 409 punctuation(Punct, P), !,
Chris@0 410 get_code(In, C).
Chris@0 411
Chris@0 412 %% turtle_number(+Char0, +In, -CharNext, -Value)
Chris@0 413 %
Chris@0 414 % Value is Type:CodeList
Chris@0 415
Chris@0 416 turtle_number(0'-, In, CN, numeric(T, [0'-|Codes])) :- !,
Chris@0 417 get_code(In, C0),
Chris@0 418 turtle_number_nn(C0, In, CN, numeric(T, Codes)).
Chris@0 419 turtle_number(0'+, In, CN, numeric(T, [0'+|Codes])) :- !,
Chris@0 420 get_code(In, C0),
Chris@0 421 turtle_number_nn(C0, In, CN, numeric(T, Codes)).
Chris@0 422 turtle_number(C0, In, CN, Value) :-
Chris@0 423 turtle_number_nn(C0, In, CN, Value).
Chris@0 424
Chris@0 425 turtle_number_nn(C, In, CN, numeric(Type, Codes)) :-
Chris@0 426 turtle_integer_codes(C, In, CN0, Codes, T0), % [0-9]+
Chris@0 427 ( CN0 == 0'.
Chris@0 428 -> T0 = [CN0|T1],
Chris@0 429 get_code(In, C1),
Chris@0 430 turtle_integer_codes(C1, In, CN1, T1, T2), % [0-9]+.[0-9]+
Chris@0 431 ( exponent(CN1, In, CN, T2)
Chris@0 432 -> Type = double
Chris@0 433 ; CN = CN1,
Chris@0 434 T2 = [],
Chris@0 435 Type = decimal
Chris@0 436 )
Chris@0 437 ; exponent(CN0, In, CN, T0)
Chris@0 438 -> Type = double
Chris@0 439 ; T0 = [],
Chris@0 440 CN = CN0,
Chris@0 441 Type = integer
Chris@0 442 ).
Chris@0 443
Chris@0 444 turtle_integer_codes(C0, In, CN, [C0|T0], T) :-
Chris@0 445 between(0'0, 0'9, C0), !,
Chris@0 446 get_code(In, C1),
Chris@0 447 turtle_integer_codes(C1, In, CN, T0, T).
Chris@0 448 turtle_integer_codes(CN, _, CN, T, T).
Chris@0 449
Chris@0 450 exponent(C0, In, CN, [C0|T0]) :-
Chris@0 451 e(C0), !,
Chris@0 452 get_code(In, C1),
Chris@0 453 optional_sign(C1, In, CN0, T0, T1),
Chris@0 454 turtle_integer_codes(CN0, In, CN, T1, []).
Chris@0 455
Chris@0 456 optional_sign(C0, In, CN, [C0|T], T) :-
Chris@0 457 sign(C0), !,
Chris@0 458 get_code(In, CN).
Chris@0 459 optional_sign(CN, _, CN, T, T).
Chris@0 460
Chris@0 461 e(0'e).
Chris@0 462 e(0'E).
Chris@0 463
Chris@0 464 sign(0'-).
Chris@0 465 sign(0'+).
Chris@0 466
Chris@0 467 % string
Chris@0 468 turtle_string(-1, In, _, []) :- !,
Chris@0 469 syntax_error(In, unexpected_end_of_input).
Chris@0 470 turtle_string(0'", In, C, []) :- !,
Chris@0 471 get_code(In, C).
Chris@0 472 turtle_string(0'\\, In, C, [H|T]) :- !,
Chris@0 473 get_code(In, C1),
Chris@0 474 string_escape(C1, In, C2, H),
Chris@0 475 turtle_string(C2, In, C, T).
Chris@0 476 turtle_string(C0, In, C, [C0|T]) :-
Chris@0 477 get_code(In, C1),
Chris@0 478 turtle_string(C1, In, C, T).
Chris@0 479
Chris@0 480
Chris@0 481 string_escape(0'n, In, C, 0'\n) :- !,
Chris@0 482 get_code(In, C).
Chris@0 483 string_escape(0'", In, C, 0'") :- !,
Chris@0 484 get_code(In, C).
Chris@0 485 string_escape(0'\\, In, C, 0'\\) :- !,
Chris@0 486 get_code(In, C).
Chris@0 487 string_escape(0't, In, C, 0'\t) :- !,
Chris@0 488 get_code(In, C).
Chris@0 489 string_escape(0'r, In, C, 0'\r) :- !,
Chris@0 490 get_code(In, C).
Chris@0 491 string_escape(0'u, In, C, Code) :- !,
Chris@0 492 get_hhhh(In, Code),
Chris@0 493 get_code(In, C).
Chris@0 494 string_escape(0'U, In, C, Code) :- !,
Chris@0 495 get_hhhh(In, Code0),
Chris@0 496 get_hhhh(In, Code1),
Chris@0 497 Code is Code0 << 16 + Code1,
Chris@0 498 get_code(In, C).
Chris@0 499
Chris@0 500 get_hhhh(In, Code) :-
Chris@0 501 get_code(In, C1), code_type(C1, xdigit(D1)),
Chris@0 502 get_code(In, C2), code_type(C2, xdigit(D2)),
Chris@0 503 get_code(In, C3), code_type(C3, xdigit(D3)),
Chris@0 504 get_code(In, C4), code_type(C4, xdigit(D4)),
Chris@0 505 Code is D1<<12+D2<<8+D3<<4+D4.
Chris@0 506
Chris@0 507 % language: [a-z]+ ('-' [a-z0-9]+ )*
Chris@0 508 language(C0, In, C, [C0|Codes]) :-
Chris@0 509 code_type(C0, lower),
Chris@0 510 get_code(In, C1),
Chris@0 511 lwr_word(C1, In, C2, Codes, Tail),
Chris@0 512 sub_langs(C2, In, C, Tail, []).
Chris@0 513
Chris@0 514 lwr_word(C0, In, C, [C0|T0], T) :-
Chris@0 515 code_type(C0, lower), !,
Chris@0 516 get_code(In, C1),
Chris@0 517 lwr_word(C1, In, C, T0, T).
Chris@0 518 lwr_word(C, _, C, T, T).
Chris@0 519
Chris@0 520 sub_langs(0'-, In, C, [0'-, C1|Codes], T) :- !,
Chris@0 521 get_code(In, C1),
Chris@0 522 lwrdig(C1), !,
Chris@0 523 get_code(In, C2),
Chris@0 524 lwrdigs(C2, In, C3, Codes, Tail),
Chris@0 525 sub_langs(C3, In, C, Tail, T).
Chris@0 526 sub_langs(C, _, C, T, T).
Chris@0 527
Chris@0 528 lwrdig(C) :-
Chris@0 529 code_type(C, lower), !.
Chris@0 530 lwrdig(C) :-
Chris@0 531 code_type(C, digit).
Chris@0 532
Chris@0 533 lwrdigs(C0, In, C, [C0|T0], T) :-
Chris@0 534 lwrdig(C0), !,
Chris@0 535 get_code(In, C1),
Chris@0 536 lwr_word(C1, In, C, T0, T).
Chris@0 537 lwrdigs(C, _, C, T, T).
Chris@0 538
Chris@0 539 % resource_token
Chris@0 540 resource_token(0'<, In, C, relative_uri(URI)) :- !,
Chris@0 541 get_code(In, C1),
Chris@0 542 uri_chars(C1, In, C, Codes),
Chris@0 543 atom_codes(URI, Codes).
Chris@0 544 resource_token(0':, In, C, Token) :- !,
Chris@0 545 get_code(In, C0),
Chris@0 546 ( name(C0, In, C, Name)
Chris@0 547 -> Token = :(Name)
Chris@0 548 ; Token = :,
Chris@0 549 C = C0
Chris@0 550 ).
Chris@0 551 resource_token(C0, In, C, Prefix:Name) :-
Chris@0 552 name(C0, In, C1, Prefix),
Chris@0 553 \+ sub_atom(Prefix, 0, _, _, '_'), !,
Chris@0 554 C1 == 0':,
Chris@0 555 get_code(In, C2),
Chris@0 556 name(C2, In, C, Name).
Chris@0 557
Chris@0 558
Chris@0 559 uri_chars(0'>, In, C, []) :- !,
Chris@0 560 get_code(In, C).
Chris@0 561 uri_chars(0'\\, In, C, [H|T]) :- !,
Chris@0 562 get_code(In, C1),
Chris@0 563 string_escape(C1, In, C2, H),
Chris@0 564 uri_chars(C2, In, C, T).
Chris@0 565 uri_chars(C0, In, C, [C0|T]) :-
Chris@0 566 get_code(In, C1),
Chris@0 567 uri_chars(C1, In, C, T).
Chris@0 568
Chris@0 569 % name
Chris@0 570 name(C0, In, C, Atom) :-
Chris@0 571 name_start_char(C0),
Chris@0 572 get_code(In, C1),
Chris@0 573 name_chars(C1, In, C, T),
Chris@0 574 atom_codes(Atom, [C0|T]).
Chris@0 575
Chris@0 576 name_chars(C0, In, C, [C0|T]) :-
Chris@0 577 name_char(C0), !,
Chris@0 578 get_code(In, C1),
Chris@0 579 name_chars(C1, In, C, T).
Chris@0 580 name_chars(C, _, C, []).
Chris@0 581
Chris@0 582 name_start_char(C) :- code_type(C, csymf).
Chris@0 583 name_start_char(C) :- between(0xC0, 0xD6, C).
Chris@0 584 name_start_char(C) :- between(0xD8, 0xF6, C).
Chris@0 585 name_start_char(C) :- between(0xF8, 0x2FF, C).
Chris@0 586 name_start_char(C) :- between(0x370, 0x37D, C).
Chris@0 587 name_start_char(C) :- between(0x37F, 0x1FFF, C).
Chris@0 588 name_start_char(C) :- between(0x200C, 0x200D, C).
Chris@0 589 name_start_char(C) :- between(0x2070, 0x218F, C).
Chris@0 590 name_start_char(C) :- between(0x2C00, 0x2FEF, C).
Chris@0 591 name_start_char(C) :- between(0x3001, 0xD7FF, C).
Chris@0 592 name_start_char(C) :- between(0xF900, 0xFDCF, C).
Chris@0 593 name_start_char(C) :- between(0xFDF0, 0xFFFD, C).
Chris@0 594 name_start_char(C) :- between(0x10000, 0xEFFFF, C).
Chris@0 595
Chris@0 596 name_char(C) :- name_start_char(C).
Chris@0 597 name_char(0'-).
Chris@0 598 name_char(D) :- code_type(D, digit).
Chris@0 599 name_char(0xB7).
Chris@0 600 name_char(C) :- between(0x0300, 0x036F, C).
Chris@0 601 name_char(C) :- between(0x203F, 0x2040, C).
Chris@0 602
Chris@0 603 punctuation(0'(, '(').
Chris@0 604 punctuation(0'), ')').
Chris@0 605 punctuation(0'[, '[').
Chris@0 606 punctuation(0'], ']').
Chris@0 607 punctuation(0',, ',').
Chris@0 608 punctuation(0'@, '@').
Chris@0 609 punctuation(0':, ':').
Chris@0 610 punctuation(0';, ';').
Chris@0 611
Chris@0 612 % comment
Chris@0 613 skip_line(0xA, In, C) :- !,
Chris@0 614 get_code(In, C).
Chris@0 615 skip_line(0xD, In, C) :- !,
Chris@0 616 get_code(In, C).
Chris@0 617 skip_line(_, In, C) :- !,
Chris@0 618 get_code(In, C1),
Chris@0 619 skip_line(C1, In, C).
Chris@0 620
Chris@0 621 % ws
Chris@0 622 turtle_ws(0x9).
Chris@0 623 turtle_ws(0xA).
Chris@0 624 turtle_ws(0xD).
Chris@0 625 turtle_ws(0x20).
Chris@0 626
Chris@0 627 syntax_error(Stream, Which) :-
Chris@0 628 stream_property(Stream, file_name(File)),
Chris@0 629 line_count(Stream, LineNo),
Chris@0 630 line_position(Stream, LinePos),
Chris@0 631 character_count(Stream, CharIndex),
Chris@0 632 throw(error(syntax_error(Which),
Chris@0 633 file(File, LineNo, LinePos, CharIndex))).
Chris@0 634
Chris@0 635
Chris@0 636 /*******************************
Chris@0 637 * HOOK *
Chris@0 638 *******************************/
Chris@0 639
Chris@0 640 :- multifile
Chris@0 641 rdf_io:load_triples/3,
Chris@0 642 rdf_io:get_triples/4.
Chris@0 643
Chris@0 644 rdf_io:load_triples(turtle, Input, Options) :- !,
Chris@0 645 debug(turtle, 'Loading turtle data from ~w', [Input]),
Chris@0 646 rdf_load_turtle_file(Input, Triples, Options),
Chris@0 647 option(base_uri(DB), Options, []),
Chris@0 648 length(Triples, N),
Chris@0 649 debug(turtle, 'Loaded ~D triples into ~w', [N, DB]),
Chris@0 650 assert_triples(Triples, DB).
Chris@0 651
Chris@0 652 assert_triples([], _).
Chris@0 653 assert_triples([rdf(S,P,O)|T], DB) :-
Chris@0 654 rdf_assert(S,P,O,DB),
Chris@0 655 assert_triples(T, DB).
Chris@0 656
Chris@0 657
Chris@0 658 rdf_io:get_triples(turtle, Input, Triples, Options) :- !,
Chris@0 659 debug(turtle, 'Loading turtle data from ~w', [Input]),
Chris@0 660 rdf_load_turtle_file(Input, Triples, Options).